diff --git a/reasoning_gym/algorithmic/ransom_note.py b/reasoning_gym/algorithmic/ransom_note.py index d3367bfc..90f30758 100644 --- a/reasoning_gym/algorithmic/ransom_note.py +++ b/reasoning_gym/algorithmic/ransom_note.py @@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/ from collections import defaultdict from dataclasses import dataclass from random import Random -from typing import Optional +from typing import Dict, Optional from ..factory import ProceduralDataset, register_dataset @@ -95,5 +95,27 @@ class RansomNoteDataset(ProceduralDataset): "metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable}, } + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves this task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + s_answer = answer.strip() + if not s_answer == str(entry['answer']): + return 0.01 + else: + return 1.0 + register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig) diff --git a/tests/test_ransom_note.py b/tests/test_ransom_note.py index 9615e8b6..d2509955 100644 --- a/tests/test_ransom_note.py +++ b/tests/test_ransom_note.py @@ -84,6 +84,11 @@ def test_group_anagrams_dataset_items(): assert len(magazine) <= config.max_magazine_length assert solution == solvable + # Test the scoring + assert dataset.score_answer(answer=item['answer'], entry=item) == 1.0 + assert dataset.score_answer(answer="gibberish", entry=item) == 0.01 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + def test_ransom_note_dataset_iteration(): """Test that iteration respects dataset size"""