include ranges rather than sampled values in difficulty metadata dicts (#387)

* update difficulty metadata for logic datasets

* update difficulty metadata for graph datasets

* update difficulty metadata for geometry datasets

* update difficulty metadata for games datasets

* update difficulty metadata for cognition datasets

* update difficulty metadata for arithmetic datasets

* update difficulty metadata for arc datasets

* update difficulty metadata for algorithmic datasets

* update difficulty metadata for algebra datasets

* use tuples

* update tests

* update tests
This commit is contained in:
Oliver Stanley 2025-03-20 09:27:03 +00:00 committed by GitHub
parent b69c35818a
commit 7475a20700
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
80 changed files with 304 additions and 126 deletions

View file

@ -141,7 +141,9 @@ class ColorCubeRotationDataset(ProceduralDataset):
"rotations": [r.value for r in rotations],
"target_side": target_side.value,
"num_rotations": num_rotations,
"difficulty": {"rotations": num_rotations},
"difficulty": {
"rotations": (self.config.min_rotations, self.config.max_rotations),
},
},
}

View file

@ -188,7 +188,9 @@ class FigletFontDataset(ProceduralDataset):
"metadata": {
"font": chosen_font,
"space_letters": self.config.space_letters,
"difficulty": {"word_len": len(word)},
"difficulty": {
"word_len": (self.config.min_word_len, self.config.max_word_len),
},
},
}

View file

@ -140,9 +140,11 @@ class ModuloGridDataset(ProceduralDataset):
"target": target,
"operation": operation,
"difficulty": {
"holes": self.config.max_holes,
"size_x": self.config.size_x,
"size_y": self.config.size_y,
"holes": self.config.max_holes,
"divisor": self.config.max_divisor,
"target": self.config.max_target,
},
},
}

View file

@ -103,7 +103,13 @@ class NeedleHaystackDataset(ProceduralDataset):
return {
"question": full_text,
"answer": stack["needle"][0],
"metadata": {"question": question, "difficulty": {"num_statements": num_statements}},
"metadata": {
"question": question,
"num_statements": num_statements,
"difficulty": {
"num_statements": (self.config.min_num_statements, self.config.max_num_statements),
},
},
}
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:

View file

@ -195,7 +195,14 @@ class NumberSequenceDataset(ProceduralDataset):
return {
"question": ", ".join(map(str, visible_terms)) + ", ?",
"answer": str(sequence[-1]),
"metadata": {"rule": rule.to_string(), "complexity": complexity, "sequence": sequence},
"metadata": {
"rule": rule.to_string(),
"complexity": complexity,
"sequence": sequence,
"difficulty": {
"max_complexity": self.config.max_complexity,
},
},
}

View file

@ -117,7 +117,14 @@ class RectangleCountDataset(ProceduralDataset):
return {
"question": QUESTION_TEMPLATE.format(puzzle=puzzle),
"answer": str(answer),
"metadata": {"puzzle": puzzle, "solution": answer, "difficulty": {"max_rectangles": target}},
"metadata": {
"puzzle": puzzle,
"solution": answer,
"num_rectangles": target,
"difficulty": {
"max_rectangles": self.config.max_rectangles,
},
},
}
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:

View file

@ -110,8 +110,8 @@ class RubiksCubeDataset(ProceduralDataset):
"scramble_moves": " ".join([str(move) for move in scramble_moves]),
"example_correct_answer": actions_string,
"difficulty": {
"scramble_steps": num_steps,
"cube_size": self.config.cube_size,
"scramble_steps": (self.config.min_scramble_steps, self.config.max_scramble_steps),
},
},
}