training ideas

2026-04-19 12:57:58 +00:00 · 2026-03-12 13:19:34 -04:00 · 2026-03-12 13:19:34 -04:00 · 7ec622a098
commit 7ec622a098
parent c26432b963
1 changed files with 35 additions and 46 deletions
--- a/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh
+++ b/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh
@ -251,59 +251,48 @@ log "  ${LOG_DIR}/student_vllm.log"
 log "  ${LOG_DIR}/teacher_vllm.log"
 log "  ${LOG_DIR}/env.log"

-# 5) Trainer (foreground, primary output)
+# 5) Trainer (background)
+TRAINER_CMD=(
+  env
+  CUDA_VISIBLE_DEVICES="$TRAINER_GPUS"
+  PYTHONUNBUFFERED=1
+  "$PYTHON_BIN"
+  -u
+  -m
+  example_trainer.grpo
+  --model-name "$STUDENT_MODEL"
+  --weight-bridge-mode shared_vllm
+  --device cuda:0
+  --save-path "$SAVE_DIR"
+  --atropos-url "http://localhost:${API_PORT}"
+  --vllm-port "$STUDENT_PORT"
+  --vllm-config-path "${BRIDGE_DIR}/vllm_bridge_config.json"
+  --training-steps "$TRAINING_STEPS"
+  --batch-size "$BATCH_SIZE"
+  --gradient-accumulation-steps "$GRAD_ACCUM"
+  --warmup-steps "$WARMUP_STEPS"
+  --lr "$LR"
+  --clip-eps "$CLIP_EPS"
+  --seq-len "$TRAINER_SEQ_LEN"
+  --distill-enabled
+  --distill-coef "$DISTILL_COEF"
+  --distill-temperature "$DISTILL_TEMPERATURE"
+  --use-wandb
+  --wandb-project "$WANDB_PROJECT"
+)
+if [[ -n "$WANDB_GROUP" ]]; then
+  TRAINER_CMD+=(--wandb-group "$WANDB_GROUP")
+fi
+
 if [[ "$DRY_RUN" == "1" ]]; then
  log "[DRY RUN] trainer command:"
  printf '  '
-  printf '%q ' env CUDA_VISIBLE_DEVICES="$TRAINER_GPUS" PYTHONUNBUFFERED=1 \
-    "$PYTHON_BIN" -u -m example_trainer.grpo \
-    --model-name "$STUDENT_MODEL" \
-    --weight-bridge-mode shared_vllm \
-    --device cuda:0 \
-    --save-path "$SAVE_DIR" \
-    --atropos-url "http://localhost:${API_PORT}" \
-    --vllm-port "$STUDENT_PORT" \
-    --vllm-config-path "${BRIDGE_DIR}/vllm_bridge_config.json" \
-    --training-steps "$TRAINING_STEPS" \
-    --batch-size "$BATCH_SIZE" \
-    --gradient-accumulation-steps "$GRAD_ACCUM" \
-    --warmup-steps "$WARMUP_STEPS" \
-    --lr "$LR" \
-    --clip-eps "$CLIP_EPS" \
-    --seq-len "$TRAINER_SEQ_LEN" \
-    --distill-enabled \
-    --distill-coef "$DISTILL_COEF" \
-    --distill-temperature "$DISTILL_TEMPERATURE" \
-    --use-wandb \
-    --wandb-project "$WANDB_PROJECT" \
-    ${WANDB_GROUP:+--wandb-group "$WANDB_GROUP"}
+  printf '%q ' "${TRAINER_CMD[@]}"
  printf '\n'
  exit 0
 fi

-start_process "trainer" "${LOG_DIR}/trainer.log" \
-  env CUDA_VISIBLE_DEVICES="$TRAINER_GPUS" PYTHONUNBUFFERED=1 \
-  "$PYTHON_BIN" -u -m example_trainer.grpo \
-    --model-name "$STUDENT_MODEL" \
-    --weight-bridge-mode shared_vllm \
-    --device cuda:0 \
-    --save-path "$SAVE_DIR" \
-    --atropos-url "http://localhost:${API_PORT}" \
-    --vllm-port "$STUDENT_PORT" \
-    --vllm-config-path "${BRIDGE_DIR}/vllm_bridge_config.json" \
-    --training-steps "$TRAINING_STEPS" \
-    --batch-size "$BATCH_SIZE" \
-    --gradient-accumulation-steps "$GRAD_ACCUM" \
-    --warmup-steps "$WARMUP_STEPS" \
-    --lr "$LR" \
-    --clip-eps "$CLIP_EPS" \
-    --seq-len "$TRAINER_SEQ_LEN" \
-    --distill-enabled \
-    --distill-coef "$DISTILL_COEF" \
-    --distill-temperature "$DISTILL_TEMPERATURE" \
-    --use-wandb \
-    --wandb-project "$WANDB_PROJECT" \
-    ${WANDB_GROUP:+--wandb-group "$WANDB_GROUP"}
+start_process "trainer" "${LOG_DIR}/trainer.log" "${TRAINER_CMD[@]}"

 log "All processes running in background."
 log ""