mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
fix CCC scoring
This commit is contained in:
parent
574edb5c5b
commit
229086131a
2 changed files with 12 additions and 7 deletions
|
|
@ -191,17 +191,12 @@ class ColorCubeRotationDataset(ProceduralDataset):
|
||||||
|
|
||||||
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||||
reward = 0.0
|
reward = 0.0
|
||||||
metadata = entry["metadata"]
|
|
||||||
if answer is not None:
|
if answer is not None:
|
||||||
try:
|
try:
|
||||||
answer_formatted = answer.lower()
|
answer_formatted = answer.strip().lower()
|
||||||
solved = answer_formatted == metadata["answer"]
|
solved = answer_formatted == entry["answer"].strip().lower()
|
||||||
if solved:
|
if solved:
|
||||||
reward = 1.0
|
reward = 1.0
|
||||||
elif metadata["answer"] in answer_formatted:
|
|
||||||
reward = 0.25
|
|
||||||
elif len(answer.strip()) > 0:
|
|
||||||
reward = 0.05
|
|
||||||
else:
|
else:
|
||||||
reward = 0.01
|
reward = 0.01
|
||||||
except:
|
except:
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,16 @@ def test_deterministic_generation():
|
||||||
assert dataset1[i]["question"] == dataset2[i]["question"]
|
assert dataset1[i]["question"] == dataset2[i]["question"]
|
||||||
assert dataset1[i]["answer"] == dataset2[i]["answer"]
|
assert dataset1[i]["answer"] == dataset2[i]["answer"]
|
||||||
|
|
||||||
|
for item in dataset1:
|
||||||
|
assert isinstance(item, dict)
|
||||||
|
assert "question" in item
|
||||||
|
assert "answer" in item
|
||||||
|
assert "metadata" in item
|
||||||
|
|
||||||
|
# Test the scoring
|
||||||
|
assert dataset1.score_answer(answer=item["answer"], entry=item) == 1.0
|
||||||
|
assert dataset1.score_answer(answer=None, entry=item) == 0.0
|
||||||
|
|
||||||
|
|
||||||
def test_cube_rotations():
|
def test_cube_rotations():
|
||||||
# Test individual rotation operations
|
# Test individual rotation operations
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue