Feat/intragen experiments (#414)

* added curriculum * readapted readme * corrected small errors * Delete eval/eval/r1/algorithmic/word_sorting.json * removed redundant argument * added spell * removed duplicated fit * changed config * added composite changes * added composite changes * updated yaml * added spell backward * updated read me * added qwen2.5 * added * Add files via upload * updated missing trainer func * updated curr * updated spell back * updated correctness score func * updated configs * added local evals * added updates * updated datasets * added fsdp to hf utility * added algorithmic qwen 3b yaml * updated read me * updated configs * added preappend token * updated with thinking token * updated test score board * resolved comments * added evaluation scripts * removed results from pr * added config * added partial reward scoring * added evaluation composites * added training configs * added games eval * added rubriks cube * resolved merge cinflicts * added games config * added latest eval configs * updated strucutre * Delete training/evaluations/eval_graphs_composite.yaml --------- Co-authored-by: joesharratt1229 <joesharrat1229@gmail.com>
2026-04-22 16:49:06 +00:00 · 2025-04-16 07:04:52 +01:00 · 2025-04-16 07:04:52 +01:00 · d0ef136d5b
commit d0ef136d5b
parent 224532f12a
21 changed files with 1331 additions and 48 deletions
--- a/reasoning_gym/cognition/rubiks_cube.py
+++ b/reasoning_gym/cognition/rubiks_cube.py
@ -121,29 +121,49 @@ class RubiksCubeDataset(ProceduralDataset):
            },
        }

+    def partial_score(self, cube: Cube) -> float:
+        """
+        Returns a fraction between 0 and 1, indicating how many stickers are
+        correctly positioned (i.e., match the solved color for that face).
+        """
+        total_stickers = 6 * (cube.size**2)
+        correct_stickers = 0
+
+        for face_index in range(6):
+            face = cube.faces[face_index]
+
+            solved_color = face[cube.size // 2][cube.size // 2].color
+            for row in range(cube.size):
+                for col in range(cube.size):
+                    sticker = face[row][col]
+                    if sticker.color == solved_color:
+                        correct_stickers += 1
+
+        return correct_stickers / total_stickers
+
    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
-        """Determine if the solution provided solves the cube"""
-        reward = 0.0  # default reward
+        """Determine if the solution provided solves the cube, with partial rewards."""
+        reward = 0.0  # default
        if answer is not None:
-            # Reconstruct the test cube
            eval_cube = Cube(entry["metadata"]["cube_size"])
            eval_cube.rotate(entry["metadata"]["scramble_moves"])
-
-            # Test the solution
            try:
                expanded_answer = self.expand_moves(answer)
                eval_cube.rotate(expanded_answer)
-                solved = eval_cube.is_done()

+                # 3) Check if fully solved
+                solved = eval_cube.is_done()
                if solved:
                    reward = 1.0
-                elif len(answer.strip()) > 0:  # encourage non-empty answers
-                    reward = 0.05  # Incorrect, but rotate could parse the answer
                else:
-                    reward = 0.01
-            except:
-                reward = 0.01  # At least you tried
+                    partial = self.partial_score(eval_cube)

+                    if len(answer.strip()) > 0:
+                        reward = max(0.05, partial)
+                    else:
+                        reward = max(0.01, partial)
+            except:
+                reward = 0.01
        return reward

    def remove_ansi(self, line):