diff --git a/example_trainer/README.md b/example_trainer/README.md index a32b552b..7531538c 100644 --- a/example_trainer/README.md +++ b/example_trainer/README.md @@ -249,10 +249,11 @@ run-api --port 8001 python -u environments/gsm8k_server.py serve \ --env.tokenizer_name $MODEL \ --env.use_wandb=False \ + --env.rollout_server_url http://localhost:8001 \ --openai.model_name $MODEL \ --openai.base_url http://localhost:9001/v1 \ --openai.server_type vllm \ - --server.port 8001 + --slurm false # Terminal 3: Trainer (manages its own vLLM) CUDA_VISIBLE_DEVICES=0,1 python -m example_trainer.grpo \ @@ -280,10 +281,11 @@ CUDA_VISIBLE_DEVICES=2 python example_trainer/vllm_api_server.py \ python -u environments/gsm8k_server.py serve \ --env.tokenizer_name $MODEL \ --env.use_wandb=False \ + --env.rollout_server_url http://localhost:8002 \ --openai.model_name $MODEL \ --openai.base_url http://localhost:9002/v1 \ --openai.server_type vllm \ - --server.port 8002 + --slurm false # Terminal 7: Trainer (attaches to vLLM) CUDA_VISIBLE_DEVICES=2 python -m example_trainer.grpo \ @@ -312,10 +314,11 @@ CUDA_VISIBLE_DEVICES=4 python example_trainer/vllm_api_server.py \ python -u environments/gsm8k_server.py serve \ --env.tokenizer_name $MODEL \ --env.use_wandb=False \ + --env.rollout_server_url http://localhost:8003 \ --openai.model_name $MODEL \ --openai.base_url http://localhost:9003/v1 \ --openai.server_type vllm \ - --server.port 8003 + --slurm false # Terminal 11: Trainer CUDA_VISIBLE_DEVICES=5 python -m example_trainer.grpo \ diff --git a/example_trainer/scripts/run_comparison.sh b/example_trainer/scripts/run_comparison.sh index c23351af..7f9ebb8b 100755 --- a/example_trainer/scripts/run_comparison.sh +++ b/example_trainer/scripts/run_comparison.sh @@ -192,10 +192,11 @@ echo " Starting environment server..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:8001" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:9001/v1" \ --openai.server_type vllm \ - --server.port 8001 \ + --slurm false \ > $LOGDIR/env_legacy.log 2>&1 & LEGACY_ENV_PID=$! echo " ✓ Environment server started (PID: $LEGACY_ENV_PID)" @@ -233,10 +234,11 @@ echo " Starting environment server..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:8002" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:9002/v1" \ --openai.server_type vllm \ - --server.port 8002 \ + --slurm false \ > $LOGDIR/env_shared.log 2>&1 & SHARED_ENV_PID=$! echo " ✓ Environment server started (PID: $SHARED_ENV_PID)" @@ -293,10 +295,11 @@ echo " Starting environment server..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:8003" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:9003/v1" \ --openai.server_type vllm \ - --server.port 8003 \ + --slurm false \ > $LOGDIR/env_lora.log 2>&1 & LORA_ENV_PID=$! echo " ✓ Environment server started (PID: $LORA_ENV_PID)" diff --git a/example_trainer/scripts/run_concurrent_tests.sh b/example_trainer/scripts/run_concurrent_tests.sh index 3b8b96a4..c9779b02 100644 --- a/example_trainer/scripts/run_concurrent_tests.sh +++ b/example_trainer/scripts/run_concurrent_tests.sh @@ -139,10 +139,11 @@ echo "[3/6] Starting LoRA GSM8k environment..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:${LORA_GSM8K_PORT}" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:${LORA_VLLM_PORT}/v1" \ --openai.server_type vllm \ - --server.port $LORA_GSM8K_PORT \ + --slurm false \ > "${LOG_DIR}/lora_gsm8k.log" 2>&1 & LORA_GSM8K_PID=$! echo " PID: $LORA_GSM8K_PID" @@ -152,10 +153,11 @@ echo "[4/6] Starting Single-Copy GSM8k environment..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:${SINGLE_COPY_GSM8K_PORT}" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:${SINGLE_COPY_VLLM_PORT}/v1" \ --openai.server_type vllm \ - --server.port $SINGLE_COPY_GSM8K_PORT \ + --slurm false \ > "${LOG_DIR}/single_copy_gsm8k.log" 2>&1 & SINGLE_COPY_GSM8K_PID=$! echo " PID: $SINGLE_COPY_GSM8K_PID" diff --git a/example_trainer/scripts/test_single_copy_mode.sh b/example_trainer/scripts/test_single_copy_mode.sh index 08bbc630..0a76e1e7 100644 --- a/example_trainer/scripts/test_single_copy_mode.sh +++ b/example_trainer/scripts/test_single_copy_mode.sh @@ -82,10 +82,11 @@ echo "[2/4] Starting GSM8k environment..." python -u environments/gsm8k_server.py serve \ --env.tokenizer_name "$MODEL" \ --env.use_wandb=False \ + --env.rollout_server_url "http://localhost:${GSM8K_PORT}" \ --openai.model_name "$MODEL" \ --openai.base_url "http://localhost:${VLLM_PORT}/v1" \ --openai.server_type vllm \ - --server.port $GSM8K_PORT \ + --slurm false \ > "${LOG_DIR}/gsm8k.log" 2>&1 & echo "Waiting for GSM8k (10s)..."