mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
24 lines
529 B
YAML
24 lines
529 B
YAML
env:
|
|
wandb_name: bleuberi
|
|
group_size: 4
|
|
max_token_length: 2048
|
|
max_num_workers_per_node: 8
|
|
steps_per_eval: 100
|
|
total_steps: 1000
|
|
include_messages: true
|
|
dataset_name: "allenai/tulu-3-sft-mixture"
|
|
dataset_split: "train"
|
|
selection_mode: "hard"
|
|
num_examples: 5000
|
|
reward_funcs:
|
|
- "bleu"
|
|
ref_models:
|
|
- "gold" # Use ground truth as reference
|
|
|
|
openai:
|
|
base_url: "http://localhost:8000/v1"
|
|
model: "Llama-3.1-8B-Instruct"
|
|
api_key: "PLACEHOLDER"
|
|
temperature: 0.7
|
|
max_tokens: 1024
|
|
top_p: 0.95
|