mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-26 17:13:09 +00:00
change OPD style
This commit is contained in:
parent
33f5696171
commit
527433b5bc
10 changed files with 452 additions and 90 deletions
|
|
@ -65,6 +65,31 @@ class TrainingConfig(BaseModel):
|
|||
"When False, falls back to REINFORCE-style updates (not recommended)."
|
||||
),
|
||||
)
|
||||
distillation_enabled: bool = Field(
|
||||
False,
|
||||
description=(
|
||||
"Enable on-policy distillation from teacher top-K distributions "
|
||||
"provided by Atropos in distill_token_ids/distill_logprobs."
|
||||
),
|
||||
)
|
||||
distillation_coef: float = Field(
|
||||
0.1,
|
||||
description=(
|
||||
"Scale factor for distillation loss. "
|
||||
"Total loss adds distillation_coef * distillation_loss."
|
||||
),
|
||||
)
|
||||
distillation_temperature: float = Field(
|
||||
1.0,
|
||||
description="Temperature used when matching teacher distributions.",
|
||||
)
|
||||
distillation_loss_type: Literal["kl", "cross_entropy"] = Field(
|
||||
"kl",
|
||||
description=(
|
||||
"Distillation objective: KL(teacher||student) or cross-entropy "
|
||||
"with teacher soft targets."
|
||||
),
|
||||
)
|
||||
|
||||
# === Device & Storage ===
|
||||
device: str = Field(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue