memory enhancements

2026-04-19 12:57:58 +00:00 · 2026-01-29 21:44:24 -05:00 · 2026-01-29 21:44:24 -05:00 · 75c4f5c853
commit 75c4f5c853
parent 99eaab3192
4 changed files with 43 additions and 7 deletions
--- a/example_trainer/config.py
+++ b/example_trainer/config.py
@ -32,6 +32,13 @@ class TrainingConfig(BaseModel):
    gradient_accumulation_steps: int = Field(
        32, description="Number of gradient accumulation steps"
    )
+    optimizer: Literal["adamw", "adamw_8bit", "adamw_cpu", "adafactor"] = Field(
+        "adamw_8bit",
+        description="Optimizer to use: 'adamw' (full precision, ~32GB GPU), "
+                    "'adamw_8bit' (8-bit states, ~8GB GPU, requires bitsandbytes), "
+                    "'adamw_cpu' (CPU offload, ~0GB GPU, slower), "
+                    "'adafactor' (no momentum, ~8GB GPU)"
+    )
    
    # === Device & Storage ===
    device: str = Field(