change OPD style

This commit is contained in:
Jai Suphavadeeprasit 2026-02-19 17:08:27 -05:00
parent 33f5696171
commit 527433b5bc
10 changed files with 452 additions and 90 deletions

View file

@ -9,5 +9,8 @@ env:
steps_per_eval: 20
use_wandb: true
wandb_name: "math-zero-lora-env"
# Optional teacher-behavior steering for on-policy distillation.
# teacher_system_prompt: "Use simple language and avoid jargon."
# teacher_prefix_text: "Style: concise, non-jargony.\n\n"
eval_limit_ratio: 0.1
max_num_workers_per_node: 24