Minor question template & score_answer improvements (#261)

* math prompt improvements
* ignore brackets in complex_arithmetic results
* improve additional instruction in prompt of polynomial_equations
* more strict tests for score_answer in polynomial_equations
* simplify special reward handling
* fix test_intermediate_integration
* fix sokoban dataset
* add common dataset score_answer consistency test
This commit is contained in:
Andreas Köpf 2025-03-04 21:55:09 +01:00 committed by GitHub
parent 061282e373
commit 5d7fbac0ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 403 additions and 507 deletions

View file

@ -339,9 +339,7 @@ class SelfReferenceDataset(ProceduralDataset):
# Solve puzzle
solutions = solve_puzzle_dynamic(puzzle)
for idx, sol in enumerate(solutions, start=1):
sol_str = ["True" if s else "False" for s in sol]
answer = len(solutions)
answer = str(len(solutions))
return {
"question": puzz_s,
@ -362,12 +360,10 @@ class SelfReferenceDataset(ProceduralDataset):
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
if str(answer) != str(entry["answer"]):
return 0.1
else:
return 1.0 # Yay
if isinstance(answer, str):
if answer == str(entry["answer"]):
return 1.0 # Yay
return 0.0
register_dataset("self_reference", SelfReferenceDataset, SelfReferenceConfig)