Minor question template & score_answer improvements (#261)

* math prompt improvements
* ignore brackets in complex_arithmetic results
* improve additional instruction in prompt of polynomial_equations
* more strict tests for score_answer in polynomial_equations
* simplify special reward handling
* fix test_intermediate_integration
* fix sokoban dataset
* add common dataset score_answer consistency test
This commit is contained in:
Andreas Köpf 2025-03-04 21:55:09 +01:00 committed by GitHub
parent bf24999bb0
commit b2904ccab9
106 changed files with 403 additions and 507 deletions

View file

@ -116,7 +116,7 @@ class LetterJumbleDataset(ProceduralDataset):
# Each word in the expected answer is worth an equal fraction of 1.0
total_words = len(expected_words)
score_per_word = 1.0 / total_words if total_words else 0
score_per_word = 1.0 / total_words if total_words > 0 else 0
# Calculate scores word by word
scores = []
@ -142,18 +142,16 @@ class LetterJumbleDataset(ProceduralDataset):
float: The computed score between 0.0 and 1.0.
"""
if not answer:
if not isinstance(answer, str):
return 0.0
oracle_answer = entry["answer"].strip().lower()
if answer:
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score
return 0.01
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score
register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)