mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
52 lines
No EOL
1.6 KiB
YAML
52 lines
No EOL
1.6 KiB
YAML
# Dataset Environment Local Testing Configuration
|
|
|
|
# Base environment parameters
|
|
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
|
|
group_size: 1
|
|
use_wandb: false
|
|
max_num_workers: 1
|
|
rollout_server_url: "http://localhost:8000"
|
|
total_steps: 1
|
|
batch_size: 1
|
|
steps_per_eval: 5
|
|
max_token_length: 4096
|
|
wandb_name: "dataset_test_local"
|
|
ensure_scores_are_not_same: false
|
|
|
|
# Dataset specific configuration
|
|
dataset:
|
|
# Dataset parameters
|
|
dataset_name: "gsm8k" # Example dataset
|
|
dataset_config: "main"
|
|
split: "train"
|
|
prompt_field: "question"
|
|
answer_field: "answer"
|
|
|
|
# Generation parameters
|
|
system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution."
|
|
prefill: "<think>\n"
|
|
shuffle_dataset: true
|
|
max_generations_per_prompt: 1
|
|
|
|
# Generation length parameters
|
|
max_tokens: 4096
|
|
length_warmup_steps: 0
|
|
min_tokens: 0
|
|
|
|
# Completion parameters
|
|
temperature: 0.7
|
|
top_p: 0.9
|
|
|
|
# Reward functions
|
|
reward_functions:
|
|
- "accuracy"
|
|
- "format"
|
|
accuracy_reward_weight: 1.0
|
|
format_reward_weight: 0.2
|
|
|
|
|
|
# Server configuration
|
|
server_configs:
|
|
- model_name: "gpt-4.1-nano"
|
|
api_key: ${OPENAI_API_KEY}
|
|
timeout: 600 |