Merge pull request #324 from DeVikingMark/fix/gradient-quantile-prefix

fix: use correct prefix for gradient quantiles with NaN/Inf
This commit is contained in:
dmahan93 2026-01-26 11:01:36 -08:00 committed by GitHub
commit b8ec055942
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -291,7 +291,7 @@ class LogTrainingStats(Callback):
and grad_data.size().numel() < 10000000
):
for q_idx, _ in enumerate(q):
stats[f"param/{name}/quantile-{q[q_idx]}"] = -10
stats[f"grad/{name}/quantile-{q[q_idx]}"] = -10
stats[f"grad/{name}/mean"] = grad_data.mean().item()
if len(grad_data.shape) > 1 or grad_data.shape[0] > 1: