Constrain reward

2026-04-22 16:49:06 +00:00 · 2025-02-17 19:20:45 +00:00 · 2025-02-17 19:20:45 +00:00 · eb708e88b3
commit eb708e88b3
parent 1d0cad46f2
1 changed files with 2 additions and 2 deletions
--- a/reasoning_gym/code/bf.py
+++ b/reasoning_gym/code/bf.py
@ -127,10 +127,10 @@ int main() {{
            if entry["answer"] in answer.splitlines():
                # We can be quite confident that the correct answer was given
                # It was likely just given alongside an explanation
-                return 0.9 * len(answer) / len(entry["answer"])
+                return max(0.9 * len(answer) / len(entry["answer"]), 0.1)
            if entry["answer"] in answer:
                # Since answers are English words, some risk of the response coincidentally containing the answer
-                return 0.5 * len(answer) / len(entry["answer"])
+                return max(0.5 * len(answer) / len(entry["answer"]), 0.1)
            return 0.01
        else:
            return 1.0  # Yay