mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-28 17:29:30 +00:00
testing set up
This commit is contained in:
parent
f44eb810bf
commit
530fed2877
8 changed files with 599 additions and 2 deletions
|
|
@ -69,6 +69,18 @@ class TrainingConfig(BaseModel):
|
|||
"Prevents large policy updates that could destabilize training."
|
||||
),
|
||||
)
|
||||
distill_enabled: bool = Field(
|
||||
False,
|
||||
description="Enable teacher distillation loss when distill tensors are present.",
|
||||
)
|
||||
distill_coef: float = Field(
|
||||
0.0,
|
||||
description="Weight for distillation loss in total loss.",
|
||||
)
|
||||
distill_temperature: float = Field(
|
||||
1.0,
|
||||
description="Temperature applied when converting teacher top-k logprobs.",
|
||||
)
|
||||
# === Device & Storage ===
|
||||
device: str = Field(
|
||||
"cuda" if torch.cuda.is_available() else "cpu", description="Device to train on"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue