Merge pull request #408 from daspartho/verl-integration-fixes

fix: re-append stop string in math training path
This commit is contained in:
dmahan93 2026-03-10 23:08:58 -05:00 committed by GitHub
commit c421582b6f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -472,6 +472,10 @@ class MathEnv(BaseEnv):
if self.config.mask_too_long_completions:
scores["overrides"][-1]["set_advantage_to_zero"] = True
else:
# re-append </answer> if stripped by vLLM stop string handling
# (mirrors the eval path in rollout_and_score_eval)
if ("<answer>" in resp) and ("</answer>" not in resp):
resp = resp + "</answer>"
task = loop.run_in_executor(self.mp_executor, score_answer, gold, resp)
reward = await task
if reward is None: