diff --git a/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh b/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh
index 94021717..197599d0 100755
--- a/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh
+++ b/example_trainer/run_gsm8k_teacher_distill_single_terminal.sh
@@ -65,6 +65,9 @@ DISTILL_COEF="${DISTILL_COEF:-0.2}"
 DISTILL_TEMPERATURE="${DISTILL_TEMPERATURE:-1.0}"
 TEACHER_TOP_K="${TEACHER_TOP_K:-8}"
 
+WANDB_PROJECT="${WANDB_PROJECT:-gsm8k-teacher-distill}"
+WANDB_GROUP="${WANDB_GROUP:-}"
+
 STUDENT_GPU_MEMORY_UTILIZATION="${STUDENT_GPU_MEMORY_UTILIZATION:-0.95}"
 TEACHER_GPU_MEMORY_UTILIZATION="${TEACHER_GPU_MEMORY_UTILIZATION:-0.95}"
 DTYPE="${DTYPE:-bfloat16}"
@@ -166,6 +169,7 @@ log "  logs=${LOG_DIR}"
 log "  saves=${SAVE_DIR}"
 log "  bridge=${BRIDGE_DIR}"
 log "  env max_token_length=${ENV_MAX_TOKEN_LENGTH}, env workers=${ENV_MAX_WORKERS_PER_NODE}, env worker_timeout=${ENV_WORKER_TIMEOUT}"
+log "  wandb project=${WANDB_PROJECT}${WANDB_GROUP:+, group=${WANDB_GROUP}}"
 
 # Shared-vLLM attach path currently expects the student server to expose
 # unsharded weights. Keep the student on TP=1 and the trainer on the same GPU set.
@@ -269,7 +273,10 @@ if [[ "$DRY_RUN" == "1" ]]; then
     --seq-len "$TRAINER_SEQ_LEN" \
     --distill-enabled \
     --distill-coef "$DISTILL_COEF" \
-    --distill-temperature "$DISTILL_TEMPERATURE"
+    --distill-temperature "$DISTILL_TEMPERATURE" \
+    --use-wandb \
+    --wandb-project "$WANDB_PROJECT" \
+    ${WANDB_GROUP:+--wandb-group "$WANDB_GROUP"}
   printf '\n'
   exit 0
 fi
@@ -293,7 +300,10 @@ start_process "trainer" "${LOG_DIR}/trainer.log" \
     --seq-len "$TRAINER_SEQ_LEN" \
     --distill-enabled \
     --distill-coef "$DISTILL_COEF" \
-    --distill-temperature "$DISTILL_TEMPERATURE"
+    --distill-temperature "$DISTILL_TEMPERATURE" \
+    --use-wandb \
+    --wandb-project "$WANDB_PROJECT" \
+    ${WANDB_GROUP:+--wandb-group "$WANDB_GROUP"}
 
 log "All processes running in background."
 log ""