diff --git a/example_trainer/configs/math_zero_lora.yaml b/example_trainer/configs/math_zero_lora.yaml new file mode 100644 index 00000000..650d9d82 --- /dev/null +++ b/example_trainer/configs/math_zero_lora.yaml @@ -0,0 +1,21 @@ +env: + tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507" + rollout_server_url: "http://localhost:8002" + max_token_length: 8192 + start_tok_length: 8192 + group_size: 8 + batch_size: 64 + total_steps: 120 + steps_per_eval: 20 + use_wandb: true + wandb_name: "math-zero-lora-env" + eval_limit_ratio: 0.1 + max_num_workers_per_node: 24 + +openai: + base_url: "http://localhost:9002/v1" + model_name: "Qwen/Qwen3-4B-Instruct-2507" + server_type: "vllm" + api_key: "x" + num_requests_for_eval: 256 + weight: 1.0 diff --git a/example_trainer/configs/math_zero_shared.yaml b/example_trainer/configs/math_zero_shared.yaml new file mode 100644 index 00000000..35979498 --- /dev/null +++ b/example_trainer/configs/math_zero_shared.yaml @@ -0,0 +1,21 @@ +env: + tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507" + rollout_server_url: "http://localhost:8001" + max_token_length: 8192 + start_tok_length: 8192 + group_size: 8 + batch_size: 64 + total_steps: 120 + steps_per_eval: 20 + use_wandb: true + wandb_name: "math-zero-shared-env" + eval_limit_ratio: 0.1 + max_num_workers_per_node: 24 + +openai: + base_url: "http://localhost:9001/v1" + model_name: "Qwen/Qwen3-4B-Instruct-2507" + server_type: "vllm" + api_key: "x" + num_requests_for_eval: 256 + weight: 1.0