mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
73 lines
No EOL
1.9 KiB
YAML
73 lines
No EOL
1.9 KiB
YAML
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-1B-Preview"
|
|
group_size: 8
|
|
use_wandb: true
|
|
max_num_workers: 256
|
|
max_eval_workers: 16
|
|
steps_per_eval: 100
|
|
batch_size: 1024
|
|
max_batches_offpolicy: 3
|
|
total_steps: 1000
|
|
rollout_server_url: "http://localhost:8000"
|
|
|
|
use_local_agents: true
|
|
|
|
dataset:
|
|
dataset_name: "gsm8k"
|
|
dataset_config: "main"
|
|
split: "train"
|
|
|
|
prompt_field: "question"
|
|
answer_field: "answer"
|
|
|
|
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem."
|
|
shuffle_dataset: true
|
|
max_generations_per_prompt: 1
|
|
include_messages_in_scoring: false
|
|
|
|
# New configurable reward functions
|
|
reward_functions:
|
|
- type: "r1"
|
|
weight: 1.5
|
|
params:
|
|
format_weight: 0.5
|
|
accuracy_weight: 1.0
|
|
- type: "cosine_scaled"
|
|
weight: 0.8
|
|
params:
|
|
scale_factor: 1.2
|
|
min_reward: -1.0
|
|
max_reward: 1.0
|
|
- type: "accuracy"
|
|
weight: 2.0
|
|
params:
|
|
split_on_think_tag: true
|
|
- type: "format"
|
|
weight: 0.7
|
|
params:
|
|
preferred_tags: ["think", "reasoning"]
|
|
require_all_tags: false
|
|
- type: "reasoning_steps"
|
|
weight: 1.0
|
|
params:
|
|
min_steps: 3
|
|
- type: "repetition_penalty"
|
|
weight: 0.5
|
|
params:
|
|
threshold: 0.1
|
|
|
|
# Legacy format still supported for backward compatibility
|
|
# reward_funcs:
|
|
# - "r1_reward"
|
|
# - "cosine_scaled_reward"
|
|
# - "accuracy_reward"
|
|
# - "format_reward"
|
|
# - "reasoning_steps_reward"
|
|
# - "repetition_penalty_reward"
|
|
|
|
max_tokens: 16000
|
|
length_warmup_steps: 100
|
|
min_tokens: 2048
|
|
|
|
eval_dataset_name: "gsm8k"
|
|
eval_dataset_config: "main"
|
|
eval_split: "test" |