atropos/environments/dataset_environment/configs/dataset_local.yaml

# Dataset Environment Local Testing Configuration

# Base environment parameters
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
group_size: 1
use_wandb: false
max_num_workers: 1
rollout_server_url: "http://localhost:8000"
total_steps: 1
batch_size: 1
steps_per_eval: 5
max_token_length: 4096
wandb_name: "dataset_test_local"
ensure_scores_are_not_same: false

# Dataset specific configuration
dataset:
  # Dataset parameters
  dataset_name: "gsm8k"  # Example dataset
  dataset_config: "main"
  split: "train"
  prompt_field: "question"
  answer_field: "answer"

  # Generation parameters
  system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution."
  prefill: "<think>\n"
  shuffle_dataset: true
  max_generations_per_prompt: 1

  # Generation length parameters
  max_tokens: 4096
  length_warmup_steps: 0
  min_tokens: 0

  # Completion parameters
  temperature: 0.7
  top_p: 0.9

  # Reward functions
  reward_functions:
    - "accuracy"
    - "format"
  accuracy_reward_weight: 1.0
  format_reward_weight: 0.2


# Server configuration
server_configs:
  - model_name: "gpt-4.1-nano"
    api_key: ${OPENAI_API_KEY}
    timeout: 600