atropos/example_trainer/configs/math_zero_shared.yaml

env:
  tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
  rollout_server_url: "http://localhost:8001"
  max_token_length: 8192
  start_tok_length: 8192
  group_size: 8
  batch_size: 64
  total_steps: 120
  steps_per_eval: 20
  use_wandb: true
  wandb_name: "math-zero-shared-env"
  eval_limit_ratio: 0.1
  max_num_workers_per_node: 24

openai:
  base_url: "http://localhost:9001/v1"
  model_name: "Qwen/Qwen3-4B-Instruct-2507"
  server_type: "vllm"
  api_key: "x"
  num_requests_for_eval: 256
  weight: 1.0