mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Minor question template & score_answer improvements (#261)
* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
This commit is contained in:
parent
061282e373
commit
5d7fbac0ad
106 changed files with 403 additions and 507 deletions
17
tests/test_dataset_common.py
Normal file
17
tests/test_dataset_common.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import reasoning_gym
|
||||
from reasoning_gym.factory import DATASETS
|
||||
|
||||
|
||||
def test_score_answer_consistency():
|
||||
for dataset_name in DATASETS.keys():
|
||||
if dataset_name == "composite":
|
||||
continue
|
||||
dataset = reasoning_gym.create_dataset(dataset_name, size=10, seed=1234)
|
||||
for entry in dataset:
|
||||
assert entry["answer"] is None or isinstance(
|
||||
entry["answer"], str
|
||||
), f"{dataset_name} answer must be str, is {type(entry['answer'])}"
|
||||
if entry["answer"] is not None:
|
||||
assert (
|
||||
dataset.score_answer(answer=entry["answer"], entry=entry) == 1.0
|
||||
), f"inconsistent score_answer {dataset_name}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue