mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
change OPD style
This commit is contained in:
parent
33f5696171
commit
527433b5bc
10 changed files with 452 additions and 90 deletions
|
|
@ -9,5 +9,8 @@ env:
|
|||
steps_per_eval: 20
|
||||
use_wandb: true
|
||||
wandb_name: "math-zero-lora-env"
|
||||
# Optional teacher-behavior steering for on-policy distillation.
|
||||
# teacher_system_prompt: "Use simple language and avoid jargon."
|
||||
# teacher_prefix_text: "Style: concise, non-jargony.\n\n"
|
||||
eval_limit_ratio: 0.1
|
||||
max_num_workers_per_node: 24
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue