diff --git a/environments/math_server_zero.py b/environments/math_server_zero.py index c6f28547..87acdd9e 100644 --- a/environments/math_server_zero.py +++ b/environments/math_server_zero.py @@ -472,6 +472,10 @@ class MathEnv(BaseEnv): if self.config.mask_too_long_completions: scores["overrides"][-1]["set_advantage_to_zero"] = True else: + # re-append if stripped by vLLM stop string handling + # (mirrors the eval path in rollout_and_score_eval) + if ("" in resp) and ("" not in resp): + resp = resp + "" task = loop.run_in_executor(self.mp_executor, score_answer, gold, resp) reward = await task if reward is None: