This commit is contained in:
Jai Suphavadeeprasit 2026-03-13 16:57:46 -04:00
parent 82964b6e48
commit 697c594c72
3 changed files with 18 additions and 7 deletions

View file

@ -1,4 +1,3 @@
import logging
import random
import time
from typing import Dict, List, Optional, Tuple, TypedDict, Union
@ -32,9 +31,6 @@ It is important that you provide your answer in the correct format.
If you do not, you will not receive credit for your answer.
So please end your answer with \\boxed{your answer here}"""
logger = logging.getLogger(__name__)
class GSM8kRow(TypedDict):
question: str
answer: str
@ -353,9 +349,8 @@ class GSM8kEnv(BaseEnv):
percentage_of_range = min(percentage_of_range, 1.0)
# Apply linear penalty scaling from 1.0 down to 0.0
scores["scores"].append(1.0 - percentage_of_range)
# NOTE: identical-score filter disabled for testing.
# if all([scores["scores"][0] == score for score in scores["scores"]]):
# return None
if all([scores["scores"][0] == score for score in scores["scores"]]):
return None
return scores
else:
# If the gold solution is not parseable, we return None