env: group_size: 8 max_num_workers: -1 max_eval_workers: 16 max_num_workers_per_node: 24 steps_per_eval: 20 max_token_length: 4096 eval_handling: LIMIT_TRAIN eval_limit_ratio: 0.1 inference_weight: 1.0 batch_size: 64 max_batches_offpolicy: 3 tokenizer_name: Qwen/Qwen3-4B-Instruct-2507 use_wandb: true rollout_server_url: http://localhost:8000 total_steps: 120 wandb_name: math-zero-env num_rollouts_to_keep: 32 num_rollouts_per_group_for_logging: 1 ensure_scores_are_not_same: true data_path_to_save_groups: null data_dir_to_save_evals: ./eval_results min_items_sent_before_logging: 2 include_messages: false min_batch_allocation: null worker_timeout: 600.0 thinking_mode: false reasoning_effort: null max_reasoning_tokens: null custom_thinking_prompt: null run_evaluation: true mask_too_long_completions: true percent_length_penalty: 0.0 start_tok_length: 8192 openai: - timeout: 1200 num_max_requests_at_once: 32 num_requests_for_eval: 32 model_name: tinker://d43e769f-dfd3-5a83-81d2-21ac97a656ad:train:0/sampler_weights/step_78 rolling_buffer_length: 1000 server_type: openai api_key: tml-N5fCuvsPh08em1BPcHnY6oaq0uvSrXOqUpveAmCLl7Ow9NpnTqoIl10Yr2kpfBUnFAAAA base_url: https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1 n_kwarg_is_ignored: false health_check: true slurm: false testing: false