From a9549057e9438412421792d28652c9ffaf6e0c4e Mon Sep 17 00:00:00 2001 From: "Andreas Koepf (aider)" Date: Sun, 2 Feb 2025 23:31:20 +0100 Subject: [PATCH] test: Add scoring tests for Arc1D dataset answer evaluation --- tests/test_arc_1d.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_arc_1d.py b/tests/test_arc_1d.py index 10443e76..197c4d9d 100644 --- a/tests/test_arc_1d.py +++ b/tests/test_arc_1d.py @@ -86,3 +86,22 @@ def test_arc_1d_iteration(): first_items = list(dataset) second_items = list(dataset) assert first_items == second_items, "Multiple iterations should yield same items" + + +def test_arc_1d_scoring(): + """Test answer scoring logic""" + config = Arc1DConfig(size=1, seed=42) + dataset = Arc1DDataset(config) + entry = dataset[0] + + # Test exact match + assert dataset.score_answer(entry["answer"], entry) == 1.0 + + # Test partial match (answer contained within response) + assert dataset.score_answer(f"The answer is: {entry['answer']}", entry) == 0.5 + + # Test incorrect answer + assert dataset.score_answer("wrong answer", entry) == 0.01 + + # Test None answer + assert dataset.score_answer(None, entry) == 0.0