mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
memory enhancements
This commit is contained in:
parent
99eaab3192
commit
75c4f5c853
4 changed files with 43 additions and 7 deletions
|
|
@ -32,6 +32,13 @@ class TrainingConfig(BaseModel):
|
|||
gradient_accumulation_steps: int = Field(
|
||||
32, description="Number of gradient accumulation steps"
|
||||
)
|
||||
optimizer: Literal["adamw", "adamw_8bit", "adamw_cpu", "adafactor"] = Field(
|
||||
"adamw_8bit",
|
||||
description="Optimizer to use: 'adamw' (full precision, ~32GB GPU), "
|
||||
"'adamw_8bit' (8-bit states, ~8GB GPU, requires bitsandbytes), "
|
||||
"'adamw_cpu' (CPU offload, ~0GB GPU, slower), "
|
||||
"'adafactor' (no momentum, ~8GB GPU)"
|
||||
)
|
||||
|
||||
# === Device & Storage ===
|
||||
device: str = Field(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue