diff --git a/example_trainer/run_gsm8k_lora_matrix.sh b/example_trainer/run_gsm8k_lora_matrix.sh new file mode 100755 index 00000000..2b77669c --- /dev/null +++ b/example_trainer/run_gsm8k_lora_matrix.sh @@ -0,0 +1,408 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Runs three GSM8K test trainings with separate infra/ports: +# 1) shared_vllm +# 2) lora_only (+ layer filtering support) +# 3) lora_restart (+ layer filtering support) +# +# Usage: +# chmod +x example_trainer/run_gsm8k_lora_matrix.sh +# ./example_trainer/run_gsm8k_lora_matrix.sh +# +# Optional environment overrides: +# MODEL_NAME="NousResearch/Hermes-3-Llama-3.1-8B" +# TRAINING_STEPS=10 +# LORA_LAYER_INDICES="20-31" +# WANDB_PROJECT="gsm8k-grpo-smoke" +# WANDB_GROUP="gsm8k-$(date +%Y%m%d-%H%M%S)" +# START_API_PORT=8002 +# START_VLLM_PORT=9001 +# PYTHON_BIN=python3 +# OUTPUT_BASE_DIR="$PWD" # logs/saves base (defaults to launch directory) +# SHARED_GPU=0 +# LORA_ONLY_TRAINER_GPU=1 +# LORA_ONLY_VLLM_GPU=2 +# LORA_RESTART_TRAINER_GPU=3 +# LORA_RESTART_VLLM_GPU=4 +# DRY_RUN=1 # print commands only, do not execute + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +LAUNCH_DIR="$PWD" +cd "$ROOT_DIR" + +PYTHON_BIN="${PYTHON_BIN:-python3}" +MODEL_NAME="${MODEL_NAME:-NousResearch/Hermes-3-Llama-3.1-8B}" +TRAINING_STEPS="${TRAINING_STEPS:-10}" +BATCH_SIZE="${BATCH_SIZE:-4}" +GRAD_ACCUM="${GRAD_ACCUM:-4}" +LR="${LR:-1e-5}" +KL_COEF="${KL_COEF:-0.1}" +CLIP_EPS="${CLIP_EPS:-0.2}" +GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.45}" +MAX_MODEL_LEN="${MAX_MODEL_LEN:-4096}" +DTYPE="${DTYPE:-bfloat16}" +LORA_R="${LORA_R:-16}" +LORA_ALPHA="${LORA_ALPHA:-32}" +LORA_DROPOUT="${LORA_DROPOUT:-0.05}" +LORA_TARGET_MODULES="${LORA_TARGET_MODULES:-q_proj v_proj}" +LORA_LAYER_INDICES="${LORA_LAYER_INDICES:-}" +WANDB_PROJECT="${WANDB_PROJECT:-gsm8k-grpo-smoke}" +WANDB_GROUP="${WANDB_GROUP:-gsm8k-$(date +%Y%m%d-%H%M%S)}" +START_API_PORT="${START_API_PORT:-8002}" +START_VLLM_PORT="${START_VLLM_PORT:-9001}" +OUTPUT_BASE_DIR="${OUTPUT_BASE_DIR:-$LAUNCH_DIR}" + +# GPU pinning (one process per GPU preference) +SHARED_GPU="${SHARED_GPU:-0}" +LORA_ONLY_TRAINER_GPU="${LORA_ONLY_TRAINER_GPU:-1}" +LORA_ONLY_VLLM_GPU="${LORA_ONLY_VLLM_GPU:-2}" +LORA_RESTART_TRAINER_GPU="${LORA_RESTART_TRAINER_GPU:-3}" +LORA_RESTART_VLLM_GPU="${LORA_RESTART_VLLM_GPU:-4}" +DRY_RUN="${DRY_RUN:-0}" + +SHARED_API_PORT="$START_API_PORT" +SHARED_VLLM_PORT="$START_VLLM_PORT" +LORA_ONLY_API_PORT="$((START_API_PORT + 1))" +LORA_ONLY_VLLM_PORT="$((START_VLLM_PORT + 1))" +LORA_RESTART_API_PORT="$((START_API_PORT + 2))" +LORA_RESTART_VLLM_PORT="$((START_VLLM_PORT + 2))" + +run_pids=() +run_ports=() + +log() { + echo "[$(date '+%H:%M:%S')] $*" +} + +kill_port() { + local port="$1" + if lsof -i ":${port}" -sTCP:LISTEN >/dev/null 2>&1; then + lsof -ti ":${port}" | xargs -r kill -9 || true + fi +} + +wait_for_http() { + local url="$1" + local timeout="${2:-180}" + local name="${3:-endpoint}" + local start + start="$(date +%s)" + while true; do + if curl -fsS "$url" >/dev/null 2>&1; then + log "Ready: ${name} (${url})" + return 0 + fi + if (( "$(date +%s)" - start > timeout )); then + log "Timeout waiting for ${name}: ${url}" + return 1 + fi + sleep 2 + done +} + +start_process() { + local name="$1" + local logfile="$2" + shift 2 + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] start ${name} (log: ${logfile})" + printf ' ' + printf '%q ' "$@" + printf '\n' + return 0 + fi + log "Starting ${name} (log: ${logfile})" + "$@" >"$logfile" 2>&1 & + local pid=$! + run_pids+=("$pid") + log "${name} PID=${pid}" +} + +cleanup_run() { + log "Cleaning up run processes..." + for pid in "${run_pids[@]:-}"; do + kill "$pid" >/dev/null 2>&1 || true + done + sleep 1 + for pid in "${run_pids[@]:-}"; do + kill -9 "$pid" >/dev/null 2>&1 || true + done + for port in "${run_ports[@]:-}"; do + kill_port "$port" + done + run_pids=() + run_ports=() +} + +add_lora_layer_flag() { + if [[ -n "$LORA_LAYER_INDICES" ]]; then + echo "--lora-layer-indices" "$LORA_LAYER_INDICES" + fi +} + +common_trainer_flags() { + echo \ + --model-name "$MODEL_NAME" \ + --training-steps "$TRAINING_STEPS" \ + --batch-size "$BATCH_SIZE" \ + --gradient-accumulation-steps "$GRAD_ACCUM" \ + --lr "$LR" \ + --kl-coef "$KL_COEF" \ + --clip-eps "$CLIP_EPS" \ + --use-wandb \ + --wandb-project "$WANDB_PROJECT" \ + --wandb-group "$WANDB_GROUP" +} + +start_gsm8k_env() { + local vllm_port="$1" + local logfile="$2" + start_process "gsm8k_env" "$logfile" \ + "$PYTHON_BIN" environments/gsm8k_server.py serve \ + --env.group_size 4 \ + --env.max_num 200 \ + --slurm.num_requests_per_time_interval 16 \ + --slurm.time_interval 10 \ + --openai.api_key "dummy" \ + --openai.base_url "http://localhost:${vllm_port}/v1" \ + --openai.model_name "$MODEL_NAME" \ + --openai.server_type vllm +} + +run_shared_vllm() { + log "========== RUN: shared_vllm ==========" + local api_port="$SHARED_API_PORT" + local vllm_port="$SHARED_VLLM_PORT" + local mode_dir="${OUTPUT_BASE_DIR}/logs/gsm8k_shared_vllm" + local save_dir="${OUTPUT_BASE_DIR}/saves/gsm8k_shared_vllm" + local bridge_dir="${mode_dir}/bridge" + mkdir -p "$mode_dir" + mkdir -p "$save_dir" + mkdir -p "$bridge_dir" + + run_ports+=("$api_port" "$vllm_port") + kill_port "$api_port" + kill_port "$vllm_port" + + start_process "run_api" "$mode_dir/run_api.log" run-api --port "$api_port" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] wait for http://localhost:${api_port}/info" + else + wait_for_http "http://localhost:${api_port}/info" 60 "run-api" + fi + + start_process "vllm_shared" "$mode_dir/vllm.log" \ + env CUDA_VISIBLE_DEVICES="$SHARED_GPU" VLLM_ENABLE_SHARED_WEIGHTS=1 LOGDIR="$bridge_dir" \ + "$PYTHON_BIN" -m example_trainer.vllm_api_server \ + --model "$MODEL_NAME" \ + --port "$vllm_port" \ + --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \ + --max-model-len "$MAX_MODEL_LEN" \ + --dtype "$DTYPE" \ + --enforce-eager + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] wait for http://localhost:${vllm_port}/health" + else + wait_for_http "http://localhost:${vllm_port}/health" 300 "shared vLLM" + fi + + start_gsm8k_env "$vllm_port" "$mode_dir/env.log" + + log "Starting trainer: shared_vllm" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] trainer command (shared_vllm):" + printf ' ' + printf '%q ' env CUDA_VISIBLE_DEVICES="$SHARED_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode shared_vllm \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --vllm-config-path "${bridge_dir}/vllm_bridge_config.json" \ + --atropos-url "http://localhost:${api_port}" + printf '\n' + log "[DRY RUN] trainer log path: $mode_dir/trainer.log" + else + env CUDA_VISIBLE_DEVICES="$SHARED_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode shared_vllm \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --vllm-config-path "${bridge_dir}/vllm_bridge_config.json" \ + --atropos-url "http://localhost:${api_port}" | tee "$mode_dir/trainer.log" + fi + + cleanup_run +} + +run_lora_only() { + log "========== RUN: lora_only ==========" + local api_port="$LORA_ONLY_API_PORT" + local vllm_port="$LORA_ONLY_VLLM_PORT" + local mode_dir="${OUTPUT_BASE_DIR}/logs/gsm8k_lora_only" + local save_dir="${OUTPUT_BASE_DIR}/saves/gsm8k_lora_only" + mkdir -p "$mode_dir" + mkdir -p "$save_dir" + + run_ports+=("$api_port" "$vllm_port") + kill_port "$api_port" + kill_port "$vllm_port" + + start_process "run_api" "$mode_dir/run_api.log" run-api --port "$api_port" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] wait for http://localhost:${api_port}/info" + else + wait_for_http "http://localhost:${api_port}/info" 60 "run-api" + fi + + start_process "vllm_lora_only" "$mode_dir/vllm.log" \ + env CUDA_VISIBLE_DEVICES="$LORA_ONLY_VLLM_GPU" \ + "$PYTHON_BIN" -m example_trainer.vllm_api_server \ + --model "$MODEL_NAME" \ + --port "$vllm_port" \ + --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \ + --max-model-len "$MAX_MODEL_LEN" \ + --dtype "$DTYPE" \ + --enable-lora \ + --enforce-eager + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] wait for http://localhost:${vllm_port}/health" + else + wait_for_http "http://localhost:${vllm_port}/health" 300 "lora_only vLLM" + fi + + start_gsm8k_env "$vllm_port" "$mode_dir/env.log" + + log "Starting trainer: lora_only" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] trainer command (lora_only):" + printf ' ' + printf '%q ' env CUDA_VISIBLE_DEVICES="$LORA_ONLY_TRAINER_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode lora_only \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --atropos-url "http://localhost:${api_port}" \ + --lora-r "$LORA_R" \ + --lora-alpha "$LORA_ALPHA" \ + --lora-dropout "$LORA_DROPOUT" \ + --lora-target-modules $LORA_TARGET_MODULES \ + $(add_lora_layer_flag) + printf '\n' + log "[DRY RUN] trainer log path: $mode_dir/trainer.log" + else + env CUDA_VISIBLE_DEVICES="$LORA_ONLY_TRAINER_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode lora_only \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --atropos-url "http://localhost:${api_port}" \ + --lora-r "$LORA_R" \ + --lora-alpha "$LORA_ALPHA" \ + --lora-dropout "$LORA_DROPOUT" \ + --lora-target-modules $LORA_TARGET_MODULES \ + $(add_lora_layer_flag) | tee "$mode_dir/trainer.log" + fi + + cleanup_run +} + +run_lora_restart() { + log "========== RUN: lora_restart ==========" + local api_port="$LORA_RESTART_API_PORT" + local vllm_port="$LORA_RESTART_VLLM_PORT" + local mode_dir="${OUTPUT_BASE_DIR}/logs/gsm8k_lora_restart" + local save_dir="${OUTPUT_BASE_DIR}/saves/gsm8k_lora_restart" + mkdir -p "$mode_dir" + mkdir -p "$save_dir" + + run_ports+=("$api_port" "$vllm_port") + kill_port "$api_port" + kill_port "$vllm_port" + + start_process "run_api" "$mode_dir/run_api.log" run-api --port "$api_port" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] wait for http://localhost:${api_port}/info" + else + wait_for_http "http://localhost:${api_port}/info" 60 "run-api" + fi + + log "Starting trainer: lora_restart (it launches its own vLLM)" + if [[ "$DRY_RUN" == "1" ]]; then + log "[DRY RUN] trainer command (lora_restart):" + printf ' ' + printf '%q ' env CUDA_VISIBLE_DEVICES="$LORA_RESTART_TRAINER_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode lora_restart \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --vllm-gpu "$LORA_RESTART_VLLM_GPU" \ + --vllm-restart-interval 3 \ + --atropos-url "http://localhost:${api_port}" \ + --lora-r "$LORA_R" \ + --lora-alpha "$LORA_ALPHA" \ + --lora-dropout "$LORA_DROPOUT" \ + --lora-target-modules $LORA_TARGET_MODULES \ + $(add_lora_layer_flag) + printf '\n' + log "[DRY RUN] then wait for http://localhost:${vllm_port}/health" + log "[DRY RUN] then start GSM8K env pointed at http://localhost:${vllm_port}/v1" + log "[DRY RUN] trainer log path: $mode_dir/trainer.log" + else + env CUDA_VISIBLE_DEVICES="$LORA_RESTART_TRAINER_GPU" "$PYTHON_BIN" -m example_trainer.grpo \ + $(common_trainer_flags) \ + --weight-bridge-mode lora_restart \ + --device cuda:0 \ + --save-path "$save_dir" \ + --vllm-port "$vllm_port" \ + --vllm-gpu "$LORA_RESTART_VLLM_GPU" \ + --vllm-restart-interval 3 \ + --atropos-url "http://localhost:${api_port}" \ + --lora-r "$LORA_R" \ + --lora-alpha "$LORA_ALPHA" \ + --lora-dropout "$LORA_DROPOUT" \ + --lora-target-modules $LORA_TARGET_MODULES \ + $(add_lora_layer_flag) >"$mode_dir/trainer.log" 2>&1 & + trainer_pid=$! + run_pids+=("$trainer_pid") + + wait_for_http "http://localhost:${vllm_port}/health" 420 "lora_restart vLLM" + start_gsm8k_env "$vllm_port" "$mode_dir/env.log" + + wait "$trainer_pid" + cat "$mode_dir/trainer.log" + fi + + cleanup_run +} + +trap cleanup_run EXIT INT TERM + +log "Model: $MODEL_NAME" +log "W&B project/group: $WANDB_PROJECT / $WANDB_GROUP" +log "Dry run mode: $DRY_RUN" +log "Output base directory (logs + saves): $OUTPUT_BASE_DIR" +log "Port plan:" +log " shared_vllm: run-api=${SHARED_API_PORT}, vllm=${SHARED_VLLM_PORT}" +log " lora_only: run-api=${LORA_ONLY_API_PORT}, vllm=${LORA_ONLY_VLLM_PORT}" +log " lora_restart: run-api=${LORA_RESTART_API_PORT}, vllm=${LORA_RESTART_VLLM_PORT}" +log "GPU plan:" +log " shared_vllm: trainer+vllm on GPU ${SHARED_GPU} (required for shared weights)" +log " lora_only: trainer GPU ${LORA_ONLY_TRAINER_GPU}, vllm GPU ${LORA_ONLY_VLLM_GPU}" +log " lora_restart: trainer GPU ${LORA_RESTART_TRAINER_GPU}, vllm GPU ${LORA_RESTART_VLLM_GPU}" +if [[ -n "$LORA_LAYER_INDICES" ]]; then + log "LoRA layer indices: $LORA_LAYER_INDICES" +else + log "LoRA layer indices: all matching layers" +fi + +run_shared_vllm +run_lora_only +run_lora_restart + +log "All GSM8K mode runs completed."