command change

This commit is contained in:
Jai Suphavadeeprasit 2026-03-07 17:50:14 -05:00
parent 530fed2877
commit d5ca760f36
2 changed files with 9 additions and 8 deletions

View file

@ -550,7 +550,7 @@ python -m example_trainer.vllm_api_server # NOT direct vllm commands
### WandB Logging
```bash
```bash
--use-wandb \
--wandb-project "my-grpo-training" \
--wandb-group "hermes-8b-gsm8k"

View file

@ -34,6 +34,7 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
LAUNCH_DIR="$PWD"
cd "$ROOT_DIR"
PYTHON_BIN="${PYTHON_BIN:-python3}"
STUDENT_MODEL="${STUDENT_MODEL:-Qwen/Qwen3-4B-Instruct-2507-FP8}"
TEACHER_MODEL="${TEACHER_MODEL:-Qwen/Qwen3-30B-A3B-Instruct-2507}"
@ -159,15 +160,15 @@ log " saves=${SAVE_DIR}"
# 1) Atropos API
start_process "run_api" "${LOG_DIR}/run_api.log" \
uv run python -m atroposlib.cli.run_api --port "$API_PORT"
run-api --port "$API_PORT"
if [[ "$DRY_RUN" == "0" ]]; then
wait_for_http "http://localhost:${API_PORT}/info" 60 "run-api"
wait_for_http "http://localhost:${API_PORT}/info" 180 "run-api"
fi
# 2) Student vLLM server
start_process "student_vllm" "${LOG_DIR}/student_vllm.log" \
env CUDA_VISIBLE_DEVICES="$STUDENT_GPUS" \
uv run python -m example_trainer.vllm_api_server \
"$PYTHON_BIN" -m example_trainer.vllm_api_server \
--model "$STUDENT_MODEL" \
--port "$STUDENT_PORT" \
--tensor-parallel-size "$STUDENT_TP" \
@ -181,7 +182,7 @@ fi
# 3) Teacher vLLM server
start_process "teacher_vllm" "${LOG_DIR}/teacher_vllm.log" \
env CUDA_VISIBLE_DEVICES="$TEACHER_GPUS" \
uv run python -m example_trainer.vllm_api_server \
"$PYTHON_BIN" -m example_trainer.vllm_api_server \
--model "$TEACHER_MODEL" \
--port "$TEACHER_PORT" \
--tensor-parallel-size "$TEACHER_TP" \
@ -194,7 +195,7 @@ fi
# 4) Teacher-distill GSM8K env
start_process "gsm8k_teacher_env" "${LOG_DIR}/env.log" \
uv run python environments/gsm8k_server_teacher_distill.py serve \
"$PYTHON_BIN" environments/gsm8k_server_teacher_distill.py serve \
--env.group_size "$ENV_GROUP_SIZE" \
--env.batch_size "$ENV_BATCH_SIZE" \
--env.total_steps "$ENV_TOTAL_STEPS" \
@ -227,7 +228,7 @@ if [[ "$DRY_RUN" == "1" ]]; then
log "[DRY RUN] trainer command:"
printf ' '
printf '%q ' env CUDA_VISIBLE_DEVICES="$TRAINER_GPU" \
uv run python -m example_trainer.grpo \
"$PYTHON_BIN" -m example_trainer.grpo \
--model-name "$STUDENT_MODEL" \
--weight-bridge-mode none \
--device cuda:0 \
@ -248,7 +249,7 @@ fi
log "Starting trainer in foreground..."
env CUDA_VISIBLE_DEVICES="$TRAINER_GPU" \
uv run python -m example_trainer.grpo \
"$PYTHON_BIN" -m example_trainer.grpo \
--model-name "$STUDENT_MODEL" \
--weight-bridge-mode none \
--device cuda:0 \