# Dataset Environment Local Testing Configuration # Base environment parameters tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview" group_size: 1 use_wandb: false max_num_workers: 1 rollout_server_url: "http://localhost:8000" total_steps: 1 batch_size: 1 steps_per_eval: 5 max_token_length: 4096 wandb_name: "dataset_test_local" ensure_scores_are_not_same: false # Dataset specific configuration dataset: # Dataset parameters dataset_name: "gsm8k" # Example dataset dataset_config: "main" split: "train" prompt_field: "question" answer_field: "answer" # Generation parameters system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution." prefill: "\n" shuffle_dataset: true max_generations_per_prompt: 1 # Generation length parameters max_tokens: 4096 length_warmup_steps: 0 min_tokens: 0 # Completion parameters temperature: 0.7 top_p: 0.9 # Reward functions reward_functions: - "accuracy" - "format" accuracy_reward_weight: 1.0 format_reward_weight: 0.2 # Server configuration server_configs: - model_name: "gpt-4.1-nano" api_key: ${OPENAI_API_KEY} timeout: 600