diff --git a/atroposlib/api/server.py b/atroposlib/api/server.py index 774e2480..8a3b6c68 100644 --- a/atroposlib/api/server.py +++ b/atroposlib/api/server.py @@ -270,6 +270,7 @@ async def get_latest_example(): "scores": [], "advantages": [], "ref_logprobs": [], + "generation_params": [], "inference_logprobs": [], "messages": [], "images": [], diff --git a/example_trainer/grpo.py b/example_trainer/grpo.py index 3edd64b4..b8ab3534 100644 --- a/example_trainer/grpo.py +++ b/example_trainer/grpo.py @@ -390,7 +390,7 @@ def train(config: TrainingConfig): # User specified that tokens/labels are already prepared by get_data outputs = model(tokens) # Assuming model just needs tokens logits = outputs.logits # Assuming this is the structure - # temp scaled logits before corss entropy (clamp to prevent zero division or just ignore 0 temps?) + # temp scaled logits before cross entropy (clamp to prevent zero division or just ignore 0 temps?) t = temperatures.to(logits.device, logits.dtype) t = torch.where(t <= 0, torch.ones_like(t), t) logits = logits / t