mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-28 17:29:30 +00:00
gradient flow fix
This commit is contained in:
parent
1c8bb34bc1
commit
33844c374b
1 changed files with 3 additions and 2 deletions
|
|
@ -226,8 +226,9 @@ def _setup_gradient_checkpointing(
|
|||
# Disable KV cache - incompatible with gradient checkpointing
|
||||
model.config.use_cache = False
|
||||
|
||||
if config.weight_bridge_mode == "lora_only":
|
||||
# PEFT models need special handling
|
||||
if config.weight_bridge_mode in ("lora_only", "lora_restart"):
|
||||
# PEFT models need special handling - enable_input_require_grads is CRITICAL
|
||||
# Without this, the LoRA parameters won't receive gradients!
|
||||
if hasattr(model, "enable_input_require_grads"):
|
||||
model.enable_input_require_grads()
|
||||
model.gradient_checkpointing_enable(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue