Game of Life partial scoring and rule-clarification (#258)

* partial scoring and rule clarification
* better ql scoring
* word seq reverse typos
This commit is contained in:
Rich Jones 2025-03-03 22:22:39 +01:00 committed by GitHub
parent 340d6a7ab9
commit e3b7365f50
4 changed files with 60 additions and 9 deletions

View file

@ -32,7 +32,7 @@ class GameOfLifeDataset(ProceduralDataset):
def __init__(self, config: GameOfLifeConfig):
self._prompt_templates = [
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Assume a Moore neighborhood and wrapping topology. Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
]
super().__init__(config=config, seed=config.seed, size=config.size)
@ -105,13 +105,42 @@ class GameOfLifeDataset(ProceduralDataset):
try:
ans_arr = json.loads(answer)
correct_arr = json.loads(entry["answer"])
if correct_arr != ans_arr:
return 0.01
else:
return 1.0 # Yay
except Exception as e:
except Exception:
return 0.01
total_cells = 0
correct_cells = 0
# Determine if the array is 2D (i.e. a list of lists)
is_2d = correct_arr and isinstance(correct_arr[0], list)
if is_2d:
# Iterate over rows and columns of the expected grid.
for i, expected_row in enumerate(correct_arr):
for j, expected_value in enumerate(expected_row):
total_cells += 1
try:
if ans_arr[i][j] == expected_value:
correct_cells += 1
except (IndexError, TypeError):
# Either the row or the cell is missing, treat as incorrect.
pass
else:
# 1D array case.
for i, expected_value in enumerate(correct_arr):
total_cells += 1
try:
if ans_arr[i] == expected_value:
correct_cells += 1
except IndexError:
pass
# If for some reason there are no cells, return 0.0.
if total_cells == 0:
return 0.0
# Each cell contributes equally.
return correct_cells / total_cells
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)

View file

@ -8,7 +8,7 @@ from typing import Optional
from ..data import read_data_file
from ..factory import ProceduralDataset, register_dataset
QUESTION_TEMPLATE = """Solve the following problem. Provide you answer as a comma-separated list of word with a space the comma. Reverse this list of words: {question}"""
QUESTION_TEMPLATE = """Solve the following problem. Provide you answer as a comma-separated list of words with a space after the comma. Reverse this list of words: {question}"""
@dataclass