atropos/environments/bleuberi/configs/default.yaml
2025-09-08 11:21:27 -05:00

24 lines
529 B
YAML

env:
wandb_name: bleuberi
group_size: 4
max_token_length: 2048
max_num_workers_per_node: 8
steps_per_eval: 100
total_steps: 1000
include_messages: true
dataset_name: "allenai/tulu-3-sft-mixture"
dataset_split: "train"
selection_mode: "hard"
num_examples: 5000
reward_funcs:
- "bleu"
ref_models:
- "gold" # Use ground truth as reference
openai:
base_url: "http://localhost:8000/v1"
model: "Llama-3.1-8B-Instruct"
api_key: "PLACEHOLDER"
temperature: 0.7
max_tokens: 1024
top_p: 0.95