diff --git a/example_trainer/test_multi_model.py b/example_trainer/test_multi_model.py index ee02d0d3..088deb21 100644 --- a/example_trainer/test_multi_model.py +++ b/example_trainer/test_multi_model.py @@ -12,23 +12,24 @@ With --auto-env, each model gets its own isolated stack: - trainer Usage: - # RECOMMENDED: Fully automated parallel test (each model gets isolated stack) + # RECOMMENDED: Fully automated parallel test with W&B logging python -m example_trainer.test_multi_model \ --models qwen3-4b hermes-8b nemotron-14b devstral-24b \ --parallel \ --gpus 0 1 2 3 \ - --auto-env + --auto-env \ + --use-wandb \ + --wandb-project multi-model-test # Sequential test on one GPU python -m example_trainer.test_multi_model \ --models qwen3-4b hermes-8b \ --sequential \ --gpu 0 \ - --auto-env + --auto-env \ + --use-wandb # Manual mode (you must start run-api and gsm8k_server yourself) - # First start: run-api --port 8002 & - # Then start gsm8k for your model python -m example_trainer.test_multi_model \ --models qwen3-4b \ --sequential \ @@ -36,9 +37,9 @@ Usage: --atropos-url http://localhost:8002 Port allocation with --auto-env: - Model 0: run-api:8002, vLLM:9001 - Model 1: run-api:8003, vLLM:9002 - Model 2: run-api:8004, vLLM:9003 + Model 0: run-api:8002, vLLM:9001, GPU from --gpus[0] + Model 1: run-api:8003, vLLM:9002, GPU from --gpus[1] + Model 2: run-api:8004, vLLM:9003, GPU from --gpus[2] ... """ @@ -195,6 +196,8 @@ def run_model_test( training_steps: int, vllm_port_offset: int = 0, auto_env: bool = False, + use_wandb: bool = False, + wandb_project: str = "multi-model-test", ) -> Dict: """ Run a complete training test for a single model. @@ -288,13 +291,15 @@ def run_model_test( # which is required for CUDA IPC with ptrace_scope=1 run_script = script_dir / "run.py" + # Don't use CUDA_VISIBLE_DEVICES - use --device instead + # run.py sets CUDA_VISIBLE_DEVICES internally based on --device run_env = os.environ.copy() - run_env["CUDA_VISIBLE_DEVICES"] = str(gpu_id) run_env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" run_cmd = [ sys.executable, "-u", str(run_script), "--model", model_config.model_id, + "--device", f"cuda:{gpu_id}", # This controls GPU selection "--vllm-port", str(vllm_port), "--gpu-memory-utilization", str(model_config.gpu_memory_utilization), "--max-model-len", str(model_config.max_model_len), @@ -307,6 +312,10 @@ def run_model_test( "--log-dir", str(log_dir), ] + # Add wandb flags if enabled + if use_wandb: + run_cmd.extend(["--use-wandb", "--wandb-project", wandb_project]) + print(f"[{model_name}] Starting unified trainer (vLLM + GRPO) for {training_steps} steps...") with open(trainer_log, "w") as tlog: trainer_process = subprocess.Popen( @@ -386,6 +395,8 @@ def run_parallel_tests( base_dir: str, training_steps: int, auto_env: bool = False, + use_wandb: bool = False, + wandb_project: str = "multi-model-test", ) -> List[Dict]: """Run tests for multiple models in parallel.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -396,7 +407,7 @@ def run_parallel_tests( def run_and_store(model, gpu, port_offset): result = run_model_test( model, gpu, atropos_url, atropos_port, base_dir, timestamp, - training_steps, port_offset, auto_env + training_steps, port_offset, auto_env, use_wandb, wandb_project ) with result_lock: results.append(result) @@ -423,6 +434,8 @@ def run_sequential_tests( base_dir: str, training_steps: int, auto_env: bool = False, + use_wandb: bool = False, + wandb_project: str = "multi-model-test", ) -> List[Dict]: """Run tests for multiple models sequentially on one GPU.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -431,7 +444,8 @@ def run_sequential_tests( for i, model in enumerate(models): result = run_model_test( model, gpu_id, atropos_url, atropos_port, base_dir, timestamp, - training_steps, port_offset=0, auto_env=auto_env + training_steps, port_offset=0, auto_env=auto_env, + use_wandb=use_wandb, wandb_project=wandb_project ) results.append(result) @@ -548,7 +562,18 @@ Available models: """ + ", ".join(TEST_MODELS.keys()) parser.add_argument( "--auto-env", action="store_true", - help="Automatically start gsm8k environment for each model (requires run-api to be running)", + help="Automatically start run-api and gsm8k environment for each model", + ) + parser.add_argument( + "--use-wandb", + action="store_true", + help="Enable Weights & Biases logging for training runs", + ) + parser.add_argument( + "--wandb-project", + type=str, + default="multi-model-test", + help="W&B project name for logging", ) args = parser.parse_args() @@ -580,19 +605,27 @@ Available models: """ + ", ".join(TEST_MODELS.keys()) gpus = gpus * (len(models) // len(gpus) + 1) print(f"Using GPUs: {gpus[:len(models)]}") + if args.use_wandb: + print(f"W&B logging enabled (project: {args.wandb_project})") results = run_parallel_tests( models, gpus[:len(models)], args.atropos_url, args.atropos_port, args.output_dir, args.training_steps, - auto_env=args.auto_env + auto_env=args.auto_env, + use_wandb=args.use_wandb, + wandb_project=args.wandb_project, ) else: print(f"Using GPU: {args.gpu}") + if args.use_wandb: + print(f"W&B logging enabled (project: {args.wandb_project})") results = run_sequential_tests( models, args.gpu, args.atropos_url, args.atropos_port, args.output_dir, args.training_steps, - auto_env=args.auto_env + auto_env=args.auto_env, + use_wandb=args.use_wandb, + wandb_project=args.wandb_project, ) # Print summary