memory enhancements

This commit is contained in:
Jai Suphavadeeprasit 2026-01-29 21:44:24 -05:00
parent 99eaab3192
commit 75c4f5c853
4 changed files with 43 additions and 7 deletions

View file

@ -32,6 +32,13 @@ class TrainingConfig(BaseModel):
gradient_accumulation_steps: int = Field(
32, description="Number of gradient accumulation steps"
)
optimizer: Literal["adamw", "adamw_8bit", "adamw_cpu", "adafactor"] = Field(
"adamw_8bit",
description="Optimizer to use: 'adamw' (full precision, ~32GB GPU), "
"'adamw_8bit' (8-bit states, ~8GB GPU, requires bitsandbytes), "
"'adamw_cpu' (CPU offload, ~0GB GPU, slower), "
"'adafactor' (no momentum, ~8GB GPU)"
)
# === Device & Storage ===
device: str = Field(