mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
"""
|
|
SkyRL Training Environment for Atropos
|
|
|
|
Integrates Berkeley SkyRL-gym with Atropos orchestration.
|
|
Supports Step-wise Process Rewards (PRM) and Zero-Copy SHM transport.
|
|
|
|
Usage:
|
|
python environments/skyrl_server.py serve \
|
|
--env.skyrl_repo_id "NovaSky-AI/Sky-AIME-5K" \
|
|
--openai.base_url http://localhost:9101/v1
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from atroposlib.envs.server_handling.server_baseline import APIServerConfig
|
|
from atroposlib.envs.skyrl_adapter import SkyRLAdapter, SkyRLConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SkyRLServerEnv(SkyRLAdapter):
|
|
"""
|
|
User-facing environment for SkyRL reasoning tasks.
|
|
"""
|
|
|
|
@classmethod
|
|
def config_init(cls) -> Tuple[SkyRLConfig, List[APIServerConfig]]:
|
|
env_config = SkyRLConfig(
|
|
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct",
|
|
group_size=8,
|
|
use_wandb=True,
|
|
rollout_server_url="http://localhost:8000",
|
|
total_steps=1000,
|
|
batch_size=4,
|
|
max_token_length=4096,
|
|
wandb_name="skyrl-reasoning",
|
|
enable_process_rewards=True,
|
|
)
|
|
server_configs = [
|
|
APIServerConfig(
|
|
model_name="Qwen/Qwen2.5-1.5B-Instruct",
|
|
base_url="http://localhost:9001/v1",
|
|
api_key="x",
|
|
server_type="sglang",
|
|
),
|
|
]
|
|
return env_config, server_configs
|
|
|
|
async def setup(self):
|
|
"""
|
|
Initialization logic for SkyRL benchmarks.
|
|
"""
|
|
await super().setup()
|
|
logger.info("SkyRL environment setup complete.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
SkyRLServerEnv.cli()
|