atropos/environments/community/bleuberi/configs/default.yaml
Allan Niemerg 0f6c06bb56 Move BLEUBERI environment to community folder
- Moved environments/bleuberi to environments/community/bleuberi
      - Updated .gitmodules to reflect new submodule path
      - Fixed pre-commit formatting issues
      - Cleaned up test output files
2025-09-08 14:38:43 -05:00

41 lines
1.3 KiB
YAML

env:
wandb_name: bleuberi
group_size: 4
max_token_length: 2048
max_num_workers_per_node: 8
steps_per_eval: 100
total_steps: 1000
include_messages: true
# Dataset configuration
dataset_name: "allenai/tulu-3-sft-mixture"
dataset_split: "train"
selection_mode: "hard"
num_examples: 5000
cache_dir: null
streaming: false
shuffle: true
# Reward configuration
reward_funcs:
- "bleu"
ref_models:
- "gold" # Use ground truth as reference
# System prompt configuration
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem. After your thinking, make sure to clearly provide your final answer inside <answer></answer> tags."
# Seeds and evaluation
seed: 42
eval_seed: 123
num_eval_samples_per_task: 5
eval_limit_ratio: 0.1
reasoning: true
openai:
base_url: "http://localhost:8000/v1"
model: "Llama-3.1-8B-Instruct"
api_key: "PLACEHOLDER"
temperature: 0.7
max_tokens: 1024
top_p: 0.95