eval max_token_length consistent with training config

instead of hardcoding, follows other envs pattern
This commit is contained in:
Partho Das 2026-03-03 18:03:04 +05:30
parent bd98a82bbc
commit 5f52befd38

View file

@ -259,7 +259,7 @@ class MathEnv(BaseEnv):
completion = await managed.completion(
prompt=question,
n=1,
max_tokens=32765,
max_tokens=self.config.max_token_length,
temperature=0.0,
split="eval",
stop=stop_list,