mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
17 lines
708 B
Python
17 lines
708 B
Python
import reasoning_gym
|
|
from reasoning_gym.factory import DATASETS
|
|
|
|
|
|
def test_score_answer_consistency():
|
|
for dataset_name in DATASETS.keys():
|
|
if dataset_name == "composite":
|
|
continue
|
|
dataset = reasoning_gym.create_dataset(dataset_name, size=10, seed=1234)
|
|
for entry in dataset:
|
|
assert entry["answer"] is None or isinstance(
|
|
entry["answer"], str
|
|
), f"{dataset_name} answer must be str, is {type(entry['answer'])}"
|
|
if entry["answer"] is not None:
|
|
assert (
|
|
dataset.score_answer(answer=entry["answer"], entry=entry) == 1.0
|
|
), f"inconsistent score_answer {dataset_name}"
|