reasoning-gym/tests/test_dataset_common.py
Andreas Köpf 5d7fbac0ad
Minor question template & score_answer improvements (#261)
* math prompt improvements
* ignore brackets in complex_arithmetic results
* improve additional instruction in prompt of polynomial_equations
* more strict tests for score_answer in polynomial_equations
* simplify special reward handling
* fix test_intermediate_integration
* fix sokoban dataset
* add common dataset score_answer consistency test
2025-03-04 21:55:09 +01:00

17 lines
708 B
Python

import reasoning_gym
from reasoning_gym.factory import DATASETS
def test_score_answer_consistency():
for dataset_name in DATASETS.keys():
if dataset_name == "composite":
continue
dataset = reasoning_gym.create_dataset(dataset_name, size=10, seed=1234)
for entry in dataset:
assert entry["answer"] is None or isinstance(
entry["answer"], str
), f"{dataset_name} answer must be str, is {type(entry['answer'])}"
if entry["answer"] is not None:
assert (
dataset.score_answer(answer=entry["answer"], entry=entry) == 1.0
), f"inconsistent score_answer {dataset_name}"