mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-24 17:05:03 +00:00
Game of Life partial scoring and rule-clarification (#258)
* partial scoring and rule clarification * better ql scoring * word seq reverse typos
This commit is contained in:
parent
340d6a7ab9
commit
e3b7365f50
4 changed files with 60 additions and 9 deletions
|
|
@ -32,7 +32,7 @@ class GameOfLifeDataset(ProceduralDataset):
|
|||
|
||||
def __init__(self, config: GameOfLifeConfig):
|
||||
self._prompt_templates = [
|
||||
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
|
||||
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Assume a Moore neighborhood and wrapping topology. Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
|
||||
]
|
||||
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
|
@ -105,13 +105,42 @@ class GameOfLifeDataset(ProceduralDataset):
|
|||
try:
|
||||
ans_arr = json.loads(answer)
|
||||
correct_arr = json.loads(entry["answer"])
|
||||
|
||||
if correct_arr != ans_arr:
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0 # Yay
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return 0.01
|
||||
|
||||
total_cells = 0
|
||||
correct_cells = 0
|
||||
|
||||
# Determine if the array is 2D (i.e. a list of lists)
|
||||
is_2d = correct_arr and isinstance(correct_arr[0], list)
|
||||
|
||||
if is_2d:
|
||||
# Iterate over rows and columns of the expected grid.
|
||||
for i, expected_row in enumerate(correct_arr):
|
||||
for j, expected_value in enumerate(expected_row):
|
||||
total_cells += 1
|
||||
try:
|
||||
if ans_arr[i][j] == expected_value:
|
||||
correct_cells += 1
|
||||
except (IndexError, TypeError):
|
||||
# Either the row or the cell is missing, treat as incorrect.
|
||||
pass
|
||||
else:
|
||||
# 1D array case.
|
||||
for i, expected_value in enumerate(correct_arr):
|
||||
total_cells += 1
|
||||
try:
|
||||
if ans_arr[i] == expected_value:
|
||||
correct_cells += 1
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# If for some reason there are no cells, return 0.0.
|
||||
if total_cells == 0:
|
||||
return 0.0
|
||||
|
||||
# Each cell contributes equally.
|
||||
return correct_cells / total_cells
|
||||
|
||||
|
||||
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Optional
|
|||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
QUESTION_TEMPLATE = """Solve the following problem. Provide you answer as a comma-separated list of word with a space the comma. Reverse this list of words: {question}"""
|
||||
QUESTION_TEMPLATE = """Solve the following problem. Provide you answer as a comma-separated list of words with a space after the comma. Reverse this list of words: {question}"""
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue