atropos/environments/dataset_environment/configs/dataset_local.yaml
2025-04-29 12:10:10 -07:00

52 lines
No EOL
1.6 KiB
YAML

# Dataset Environment Local Testing Configuration
# Base environment parameters
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
group_size: 1
use_wandb: false
max_num_workers: 1
rollout_server_url: "http://localhost:8000"
total_steps: 1
batch_size: 1
steps_per_eval: 5
max_token_length: 4096
wandb_name: "dataset_test_local"
ensure_scores_are_not_same: false
# Dataset specific configuration
dataset:
# Dataset parameters
dataset_name: "gsm8k" # Example dataset
dataset_config: "main"
split: "train"
prompt_field: "question"
answer_field: "answer"
# Generation parameters
system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution."
prefill: "<think>\n"
shuffle_dataset: true
max_generations_per_prompt: 1
# Generation length parameters
max_tokens: 4096
length_warmup_steps: 0
min_tokens: 0
# Completion parameters
temperature: 0.7
top_p: 0.9
# Reward functions
reward_functions:
- "accuracy"
- "format"
accuracy_reward_weight: 1.0
format_reward_weight: 0.2
# Server configuration
server_configs:
- model_name: "gpt-4.1-nano"
api_key: ${OPENAI_API_KEY}
timeout: 600