Add BLEUBERI environment for reference-based RL

This commit is contained in:
Allan Niemerg 2025-06-08 18:02:33 -05:00
parent 3f6015e622
commit 5bb5bd2c3d
7 changed files with 948 additions and 0 deletions

View file

@ -0,0 +1,24 @@
env:
wandb_name: bleuberi
group_size: 4
max_token_length: 2048
max_num_workers_per_node: 8
steps_per_eval: 100
total_steps: 1000
include_messages: true
dataset_name: "allenai/tulu-3-sft-mixture"
dataset_split: "train"
selection_mode: "hard"
num_examples: 5000
reward_funcs:
- "bleu"
ref_models:
- "gold" # Use ground truth as reference
openai:
base_url: "http://localhost:8000/v1"
model: "Llama-3.1-8B-Instruct"
api_key: "PLACEHOLDER"
temperature: 0.7
max_tokens: 1024
top_p: 0.95