feedback fixes: shared layers + hard coded values + warmup steps

This commit is contained in:
Jai Suphavadeeprasit 2026-02-24 10:28:44 -05:00
parent e1f9b926bb
commit 624b3cdabe
9 changed files with 247 additions and 58 deletions

View file

@ -102,7 +102,7 @@ def save_checkpoint(
torch.save(state_dict, os.path.join(checkpoint_path, "pytorch_model.bin"))
model.config.save_pretrained(checkpoint_path)
# CRITICAL: Clean up the copied state_dict to free ~8GB GPU memory!
# CRITICAL: Clean up the copied state_dict to free significant GPU memory.
del state_dict
import gc