ensure reward is float

2026-04-19 12:58:07 +00:00 · 2025-02-16 16:27:12 +01:00 · 2025-02-16 16:27:12 +01:00 · 4c47b7966f
commit 4c47b7966f
parent c858d1f236
3 changed files with 5 additions and 5 deletions
--- a/reasoning_gym/algorithmic/sentence_reordering.py
+++ b/reasoning_gym/algorithmic/sentence_reordering.py
@ -93,7 +93,7 @@ class SentenceReorderingDataset(ProceduralDataset):
        }

    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
-        reward = 0
+        reward = 0.0
        expected_answer = entry["answer"]
        if answer is not None:
            try: