fix CCC scoring

This commit is contained in:
Rich Jones 2025-02-26 12:54:40 +01:00
parent 574edb5c5b
commit 229086131a
2 changed files with 12 additions and 7 deletions

View file

@ -191,17 +191,12 @@ class ColorCubeRotationDataset(ProceduralDataset):
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 0.0
metadata = entry["metadata"]
if answer is not None:
try:
answer_formatted = answer.lower()
solved = answer_formatted == metadata["answer"]
answer_formatted = answer.strip().lower()
solved = answer_formatted == entry["answer"].strip().lower()
if solved:
reward = 1.0
elif metadata["answer"] in answer_formatted:
reward = 0.25
elif len(answer.strip()) > 0:
reward = 0.05
else:
reward = 0.01
except:

View file

@ -49,6 +49,16 @@ def test_deterministic_generation():
assert dataset1[i]["question"] == dataset2[i]["question"]
assert dataset1[i]["answer"] == dataset2[i]["answer"]
for item in dataset1:
assert isinstance(item, dict)
assert "question" in item
assert "answer" in item
assert "metadata" in item
# Test the scoring
assert dataset1.score_answer(answer=item["answer"], entry=item) == 1.0
assert dataset1.score_answer(answer=None, entry=item) == 0.0
def test_cube_rotations():
# Test individual rotation operations