mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
47 lines
1.4 KiB
YAML
47 lines
1.4 KiB
YAML
env:
|
|
group_size: 8
|
|
max_num_workers: -1
|
|
max_eval_workers: 16
|
|
max_num_workers_per_node: 24
|
|
steps_per_eval: 20
|
|
max_token_length: 4096
|
|
eval_handling: LIMIT_TRAIN
|
|
eval_limit_ratio: 0.1
|
|
inference_weight: 1.0
|
|
batch_size: 64
|
|
max_batches_offpolicy: 3
|
|
tokenizer_name: Qwen/Qwen3-4B-Instruct-2507
|
|
use_wandb: true
|
|
rollout_server_url: http://localhost:8000
|
|
total_steps: 120
|
|
wandb_name: math-zero-env
|
|
num_rollouts_to_keep: 32
|
|
num_rollouts_per_group_for_logging: 1
|
|
ensure_scores_are_not_same: true
|
|
data_path_to_save_groups: null
|
|
data_dir_to_save_evals: ./eval_results
|
|
min_items_sent_before_logging: 2
|
|
include_messages: false
|
|
min_batch_allocation: null
|
|
worker_timeout: 600.0
|
|
thinking_mode: false
|
|
reasoning_effort: null
|
|
max_reasoning_tokens: null
|
|
custom_thinking_prompt: null
|
|
run_evaluation: true
|
|
mask_too_long_completions: true
|
|
percent_length_penalty: 0.0
|
|
start_tok_length: 8192
|
|
openai:
|
|
- timeout: 1200
|
|
num_max_requests_at_once: 32
|
|
num_requests_for_eval: 32
|
|
model_name: tinker://d43e769f-dfd3-5a83-81d2-21ac97a656ad:train:0/sampler_weights/step_78
|
|
rolling_buffer_length: 1000
|
|
server_type: openai
|
|
api_key: tml-N5fCuvsPh08em1BPcHnY6oaq0uvSrXOqUpveAmCLl7Ow9NpnTqoIl10Yr2kpfBUnFAAAA
|
|
base_url: https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1
|
|
n_kwarg_is_ignored: false
|
|
health_check: true
|
|
slurm: false
|
|
testing: false
|