mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-26 17:13:17 +00:00
normalize answer and partial reward
This commit is contained in:
parent
1f9d9d27ab
commit
ef2a412c8b
2 changed files with 52 additions and 1 deletions
|
|
@ -85,6 +85,37 @@ def test_prime_factorization_known_values():
|
|||
assert item["answer"] == "2 × 2 × 3"
|
||||
|
||||
|
||||
def test_prime_factorization_score_answer():
|
||||
"""Test scoring of answers"""
|
||||
config = PrimeFactorizationConfig(min_value=12, max_value=12, size=1, seed=42) # Force specific number
|
||||
dataset = PrimeFactorizationDataset(config)
|
||||
item = dataset[0]
|
||||
|
||||
# Perfectly ordered answer
|
||||
answer = "2 × 2 × 3"
|
||||
assert dataset.score_answer(answer, item) == 1.0
|
||||
|
||||
# No white spaces answer (still correct)
|
||||
answer = "2×2×3"
|
||||
assert dataset.score_answer(answer, item) == 1.0
|
||||
|
||||
# Shuffled factors (still correct)
|
||||
answer = "2 × 3 × 2"
|
||||
assert dataset.score_answer(answer, item) == 1.0
|
||||
|
||||
# Partially correct answer (not all numbers are fully factorized)
|
||||
answer = "2 × 6"
|
||||
assert dataset.score_answer(answer, item) == 0.5
|
||||
|
||||
# Incorrect answer
|
||||
answer = "2 × 5"
|
||||
assert dataset.score_answer(answer, item) == 0.01
|
||||
|
||||
# Answer is none
|
||||
answer = None
|
||||
assert dataset.score_answer(answer, item) == 0.0
|
||||
|
||||
|
||||
def is_prime(n: int) -> bool:
|
||||
"""Helper function to check if a number is prime"""
|
||||
if n < 2:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue