Minor question template & score_answer improvements (#261)

* math prompt improvements
* ignore brackets in complex_arithmetic results
* improve additional instruction in prompt of polynomial_equations
* more strict tests for score_answer in polynomial_equations
* simplify special reward handling
* fix test_intermediate_integration
* fix sokoban dataset
* add common dataset score_answer consistency test
This commit is contained in:
Andreas Köpf 2025-03-04 21:55:09 +01:00 committed by GitHub
parent 061282e373
commit 5d7fbac0ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 403 additions and 507 deletions

View file

@ -0,0 +1,17 @@
import reasoning_gym
from reasoning_gym.factory import DATASETS
def test_score_answer_consistency():
for dataset_name in DATASETS.keys():
if dataset_name == "composite":
continue
dataset = reasoning_gym.create_dataset(dataset_name, size=10, seed=1234)
for entry in dataset:
assert entry["answer"] is None or isinstance(
entry["answer"], str
), f"{dataset_name} answer must be str, is {type(entry['answer'])}"
if entry["answer"] is not None:
assert (
dataset.score_answer(answer=entry["answer"], entry=entry) == 1.0
), f"inconsistent score_answer {dataset_name}"