diff --git a/README.md b/README.md index ebb6c1fa..eee342ae 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ We are building a python library of procedural dataset generators and algorithmi The goal is to generate virtually infinite data with adjustable complexity. +Algorithmic verification allows to train on tasks like Rubikā€˜s cube or [Countdown](https://en.wikipedia.org/wiki/Countdown_(game_show)#Numbers_Round) which have many correct solutions. + ## Set up for development 1. Clone the project ``` @@ -38,6 +40,8 @@ data = reasoning_gym.create_dataset('leg_counting', size=10, seed=42) for i, x in enumerate(data): print(f'{i}: q="{x['question']}", a="{x['answer']}"') print('metadata:', x['metadata']) + # use the dataset's `score_answer` method for algorithmic verification + assert data.score_answer(answer=x['answer'], entry=x) == 1.0 ``` Output: diff --git a/reasoning_gym/graphs/quantum_lock.py b/reasoning_gym/graphs/quantum_lock.py index 10e83fdd..402b6f0c 100644 --- a/reasoning_gym/graphs/quantum_lock.py +++ b/reasoning_gym/graphs/quantum_lock.py @@ -1,7 +1,7 @@ -from dataclasses import dataclass import re -from random import Random from collections import deque +from dataclasses import dataclass +from random import Random from typing import Any, Optional from ..factory import ProceduralDataset, register_dataset diff --git a/tests/test_quantum_lock.py b/tests/test_quantum_lock.py index a004afc9..69162750 100644 --- a/tests/test_quantum_lock.py +++ b/tests/test_quantum_lock.py @@ -1,4 +1,5 @@ import pytest + from reasoning_gym.graphs.quantum_lock import QuantumLockConfig, QuantumLockDataset @@ -53,20 +54,20 @@ def test_quantumlock_button_states(): for item in dataset: buttons = item["metadata"]["buttons"] - + # Check button properties for btn in buttons: assert "name" in btn assert "type" in btn assert "value" in btn assert "active_state" in btn - + # Verify button name format assert btn["name"] in ["A", "B", "C"] - + # Verify operation type assert btn["type"] in ["add", "subtract", "multiply"] - + # Verify state constraints assert btn["active_state"] in ["red", "green", "any"] @@ -79,19 +80,16 @@ def test_quantumlock_solution_validation(): for item in dataset: solution = item["metadata"]["solution_path"] target = item["metadata"]["target_value"] - + # Test solution simulation - final_value = dataset.simulate_sequence( - item["metadata"], - solution - ) + final_value = dataset.simulate_sequence(item["metadata"], solution) assert final_value == target # Test invalid button sequences - assert dataset.simulate_sequence( - item["metadata"], - ["X", "Y", "Z"] # Invalid buttons - ) == item["metadata"]["initial_value"] + assert ( + dataset.simulate_sequence(item["metadata"], ["X", "Y", "Z"]) # Invalid buttons + == item["metadata"]["initial_value"] + ) def test_quantumlock_scoring(): @@ -101,17 +99,17 @@ def test_quantumlock_scoring(): for item in dataset: solution = item["metadata"]["solution_path"] - + # Test correct solution assert dataset.score_answer(solution, item) == 1.0 - + # Test empty/None answers assert dataset.score_answer(None, item) == 0.0 assert dataset.score_answer("", item) == 0.1 - + # Test invalid buttons assert dataset.score_answer("XYZ", item) == 0.1 - + # Test case insensitivity if solution: lower_solution = "".join(solution).lower()