env: wandb_name: bleuberi group_size: 4 max_token_length: 2048 max_num_workers_per_node: 8 steps_per_eval: 100 total_steps: 1000 include_messages: true dataset_name: "allenai/tulu-3-sft-mixture" dataset_split: "train" selection_mode: "hard" num_examples: 5000 reward_funcs: - "bleu" ref_models: - "gold" # Use ground truth as reference openai: base_url: "http://localhost:8000/v1" model: "Llama-3.1-8B-Instruct" api_key: "PLACEHOLDER" temperature: 0.7 max_tokens: 1024 top_p: 0.95