Fix math_server_zero.py to support CLI OpenAI arguments

Change ServerBaseline to APIServerConfig in config_init() so that --openai.base_url and other CLI arguments work for on-policy distillation. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-04-26 17:13:09 +00:00 · 2026-02-16 17:18:01 -05:00 · 2026-02-16 17:18:01 -05:00 · becadb54b0
commit becadb54b0
parent cc9b891eba
1 changed files with 5 additions and 3 deletions
--- a/environments/math_server_zero.py
+++ b/environments/math_server_zero.py
@ -18,6 +18,7 @@ from pydantic import Field
 from tqdm.asyncio import tqdm_asyncio

 from atroposlib.envs.base import (
+    APIServerConfig,
    BaseEnv,
    BaseEnvConfig,
    EvalHandlingEnum,
@ -119,7 +120,7 @@ class MathEnv(BaseEnv):
    def __init__(
        self,
        config: RSConfig,
-        server_configs: ServerBaseline,
+        server_configs: APIServerConfig | ServerBaseline,
        slurm=True,
        testing=False,
    ):
@ -137,7 +138,7 @@ class MathEnv(BaseEnv):
        self.iter = 0

    @classmethod
-    def config_init(cls) -> Tuple[RSConfig, ServerBaseline]:
+    def config_init(cls) -> Tuple[RSConfig, APIServerConfig]:
        env_config = RSConfig(
            tokenizer_name="Qwen/Qwen2.5-7B",
            group_size=16,
@ -152,10 +153,11 @@ class MathEnv(BaseEnv):
            eval_limit_ratio=0.1,
            max_num_workers_per_node=24,
        )
-        server_configs = ServerBaseline(
+        server_configs = APIServerConfig(
            model_name="Qwen/Qwen2.5-7B",
            num_requests_for_eval=256,  # since evaling only on one...
            server_type="vllm",
+            base_url="",  # Override via CLI: --openai.base_url
        )

        return env_config, server_configs