diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py index 477f9e0d..e3f65013 100644 --- a/reasoning_gym/algorithmic/group_anagrams.py +++ b/reasoning_gym/algorithmic/group_anagrams.py @@ -90,7 +90,7 @@ class GroupAnagramsDataset(ProceduralDataset): def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: """Score a single Group Anagrams question""" - reward = 0 + reward = 0.0 if answer is not None: try: answer = json.loads(answer) @@ -98,11 +98,11 @@ class GroupAnagramsDataset(ProceduralDataset): answer_str = json.dumps(self._sort_nested_list(answer)) oracle_str = json.dumps(self._sort_nested_list(oracle)) if answer_str == oracle_str: - reward = 1 + reward = 1.0 else: reward = 0.01 except Exception: - reward = 0 + reward = 0.0 return reward def __getitem__(self, idx: int) -> dict: diff --git a/reasoning_gym/algorithmic/sentence_reordering.py b/reasoning_gym/algorithmic/sentence_reordering.py index 57f19d6e..f1303f09 100644 --- a/reasoning_gym/algorithmic/sentence_reordering.py +++ b/reasoning_gym/algorithmic/sentence_reordering.py @@ -93,7 +93,7 @@ class SentenceReorderingDataset(ProceduralDataset): } def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float: - reward = 0 + reward = 0.0 expected_answer = entry["answer"] if answer is not None: try: diff --git a/reasoning_gym/algorithmic/spell_backward.py b/reasoning_gym/algorithmic/spell_backward.py index d1837521..60af94b6 100644 --- a/reasoning_gym/algorithmic/spell_backward.py +++ b/reasoning_gym/algorithmic/spell_backward.py @@ -50,7 +50,7 @@ class SpellBackwardDataset(ProceduralDataset): } def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float: - reward = 0 + reward = 0.0 expected_answer = entry["answer"] if answer is not None: try: