Merge pull request #408 from daspartho/verl-integration-fixes

fix: re-append stop string in math training path
2026-04-19 12:57:58 +00:00 · 2026-03-10 23:08:58 -05:00 · 2026-03-10 23:08:58 -05:00 · c421582b6f
commit c421582b6f
parent 1d78069b5d 632ab0161c
1 changed files with 4 additions and 0 deletions
--- a/environments/math_server_zero.py
+++ b/environments/math_server_zero.py
@ -472,6 +472,10 @@ class MathEnv(BaseEnv):
                if self.config.mask_too_long_completions:
                    scores["overrides"][-1]["set_advantage_to_zero"] = True
            else:
                # re-append </answer> if stripped by vLLM stop string handling
                # (mirrors the eval path in rollout_and_score_eval)
                if ("<answer>" in resp) and ("</answer>" not in resp):
                    resp = resp + "</answer>"
                task = loop.run_in_executor(self.mp_executor, score_answer, gold, resp)
                reward = await task
                if reward is None: