mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
cleanup 3
This commit is contained in:
parent
fe5b13a5da
commit
e2e8268f2a
4 changed files with 4 additions and 93 deletions
|
|
@ -32,11 +32,10 @@ class TrainingConfig(BaseModel):
|
|||
gradient_accumulation_steps: int = Field(
|
||||
32, description="Number of gradient accumulation steps"
|
||||
)
|
||||
optimizer: Literal["adamw", "adamw_8bit", "adamw_cpu", "adafactor"] = Field(
|
||||
optimizer: Literal["adamw", "adamw_8bit", "adafactor"] = Field(
|
||||
"adamw_8bit",
|
||||
description="Optimizer to use: 'adamw' (full precision, ~32GB GPU), "
|
||||
"'adamw_8bit' (8-bit states, ~8GB GPU, requires bitsandbytes), "
|
||||
"'adamw_cpu' (CPU offload, ~0GB GPU, slower), "
|
||||
"'adafactor' (no momentum, ~8GB GPU)",
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue