mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
Merge pull request #408 from daspartho/verl-integration-fixes
fix: re-append stop string in math training path
This commit is contained in:
commit
c421582b6f
1 changed files with 4 additions and 0 deletions
|
|
@ -472,6 +472,10 @@ class MathEnv(BaseEnv):
|
||||||
if self.config.mask_too_long_completions:
|
if self.config.mask_too_long_completions:
|
||||||
scores["overrides"][-1]["set_advantage_to_zero"] = True
|
scores["overrides"][-1]["set_advantage_to_zero"] = True
|
||||||
else:
|
else:
|
||||||
|
# re-append </answer> if stripped by vLLM stop string handling
|
||||||
|
# (mirrors the eval path in rollout_and_score_eval)
|
||||||
|
if ("<answer>" in resp) and ("</answer>" not in resp):
|
||||||
|
resp = resp + "</answer>"
|
||||||
task = loop.run_in_executor(self.mp_executor, score_answer, gold, resp)
|
task = loop.run_in_executor(self.mp_executor, score_answer, gold, resp)
|
||||||
reward = await task
|
reward = await task
|
||||||
if reward is None:
|
if reward is None:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue