mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Feat/intragen experiments (#414)
* added curriculum * readapted readme * corrected small errors * Delete eval/eval/r1/algorithmic/word_sorting.json * removed redundant argument * added spell * removed duplicated fit * changed config * added composite changes * added composite changes * updated yaml * added spell backward * updated read me * added qwen2.5 * added * Add files via upload * updated missing trainer func * updated curr * updated spell back * updated correctness score func * updated configs * added local evals * added updates * updated datasets * added fsdp to hf utility * added algorithmic qwen 3b yaml * updated read me * updated configs * added preappend token * updated with thinking token * updated test score board * resolved comments * added evaluation scripts * removed results from pr * added config * added partial reward scoring * added evaluation composites * added training configs * added games eval * added rubriks cube * resolved merge cinflicts * added games config * added latest eval configs * updated strucutre * Delete training/evaluations/eval_graphs_composite.yaml --------- Co-authored-by: joesharratt1229 <joesharrat1229@gmail.com>
This commit is contained in:
parent
224532f12a
commit
d0ef136d5b
21 changed files with 1331 additions and 48 deletions
|
|
@ -121,29 +121,49 @@ class RubiksCubeDataset(ProceduralDataset):
|
|||
},
|
||||
}
|
||||
|
||||
def partial_score(self, cube: Cube) -> float:
|
||||
"""
|
||||
Returns a fraction between 0 and 1, indicating how many stickers are
|
||||
correctly positioned (i.e., match the solved color for that face).
|
||||
"""
|
||||
total_stickers = 6 * (cube.size**2)
|
||||
correct_stickers = 0
|
||||
|
||||
for face_index in range(6):
|
||||
face = cube.faces[face_index]
|
||||
|
||||
solved_color = face[cube.size // 2][cube.size // 2].color
|
||||
for row in range(cube.size):
|
||||
for col in range(cube.size):
|
||||
sticker = face[row][col]
|
||||
if sticker.color == solved_color:
|
||||
correct_stickers += 1
|
||||
|
||||
return correct_stickers / total_stickers
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Determine if the solution provided solves the cube"""
|
||||
reward = 0.0 # default reward
|
||||
"""Determine if the solution provided solves the cube, with partial rewards."""
|
||||
reward = 0.0 # default
|
||||
if answer is not None:
|
||||
# Reconstruct the test cube
|
||||
eval_cube = Cube(entry["metadata"]["cube_size"])
|
||||
eval_cube.rotate(entry["metadata"]["scramble_moves"])
|
||||
|
||||
# Test the solution
|
||||
try:
|
||||
expanded_answer = self.expand_moves(answer)
|
||||
eval_cube.rotate(expanded_answer)
|
||||
solved = eval_cube.is_done()
|
||||
|
||||
# 3) Check if fully solved
|
||||
solved = eval_cube.is_done()
|
||||
if solved:
|
||||
reward = 1.0
|
||||
elif len(answer.strip()) > 0: # encourage non-empty answers
|
||||
reward = 0.05 # Incorrect, but rotate could parse the answer
|
||||
else:
|
||||
reward = 0.01
|
||||
except:
|
||||
reward = 0.01 # At least you tried
|
||||
partial = self.partial_score(eval_cube)
|
||||
|
||||
if len(answer.strip()) > 0:
|
||||
reward = max(0.05, partial)
|
||||
else:
|
||||
reward = max(0.01, partial)
|
||||
except:
|
||||
reward = 0.01
|
||||
return reward
|
||||
|
||||
def remove_ansi(self, line):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue