Merge branch 'main' into env/pool_matrix

2026-04-23 16:55:05 +00:00 · 2025-02-12 14:07:25 +01:00 · 2025-02-12 14:07:25 +01:00 · 1669bba91b
commit 1669bba91b
parent 41556dd054 b741cab040
24 changed files with 1562 additions and 22 deletions
--- a/reasoning_gym/algorithmic/init.py
+++ b/reasoning_gym/algorithmic/init.py
@ -6,9 +6,11 @@ Algorithmic tasks for training reasoning capabilities:
 - Pattern matching
 """

+from .ab import ABConfig, ABDataset
 from .base_conversion import BaseConversionConfig, BaseConversionDataset
 from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset
 from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
+from .count_primes import CountPrimesConfig, CountPrimesDataset
 from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
 from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
 from .letter_counting import LetterCountingConfig, LetterCountingDataset
@ -69,4 +71,8 @@ __all__ = [
    "BinaryMatrixDataset",
    "PoolMatrixConfig",
    "PoolMatrixDataset",
+    "ABConfig",
+    "ABDataset",
+    "CountPrimesConfig",
+    "CountPrimesDataset",
 ]
--- a/reasoning_gym/algorithmic/ab.py
+++ b/reasoning_gym/algorithmic/ab.py
@ -0,0 +1,154 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+def generate_program(length, rng):
+    """Generates a random initial program of a given length."""
+    elements = ["A#", "B#", "#A", "#B"]
+    return [rng.choice(elements) for _ in range(length)]
+
+
+def compute_steps(program, max_steps=100):
+    """Computes the transformation steps and detects if the program does not halt."""
+    steps = [program.copy()]
+    seen_states = {tuple(program)}
+
+    for step in range(max_steps):
+        current = steps[-1]
+        new_program = None
+
+        for i in range(len(current) - 1):
+            a, b = current[i], current[i + 1]
+            if a == "A#" and b == "#A":
+                new_program = current[:i] + current[i + 2 :]
+            elif a == "A#" and b == "#B":
+                new_program = current[:i] + ["#B", "A#"] + current[i + 2 :]
+            elif a == "B#" and b == "#A":
+                new_program = current[:i] + ["#A", "B#"] + current[i + 2 :]
+            elif a == "B#" and b == "#B":
+                new_program = current[:i] + current[i + 2 :]
+
+            if new_program is not None:
+                break
+
+        if new_program is None:
+            # No more transformations possible
+            return steps, False
+
+        if tuple(new_program) in seen_states:
+            # Detected a loop, meaning non-halting behavior
+            return steps, True
+
+        steps.append(new_program)
+        seen_states.add(tuple(new_program))
+
+    return steps, True  # Reached max steps, assume non-halting
+
+
+@dataclass
+class ABConfig:
+    """Configuration for A::B task generation"""
+
+    seed: Optional[int] = None
+    size: int = 500
+    length: int = 10
+
+    def validate(self) -> None:
+        """Validate configuration parameters"""
+        assert self.length > 0, "length must be greater than 0"
+        assert self.size > 0, "size must be greater than 0"
+
+
+class ABDataset(ProceduralDataset):
+    """Generates A::B tasks, as described by @VictorTaelin [here](https://x.com/VictorTaelin/status/1776096481704804789)"""
+
+    def __init__(self, config: ABConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single AB task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description with AB program
+                - answer: str, the result of this AB program ABI execution
+                - metadata: dict with generation parameters
+        """
+        rng = Random(self.seed + idx)
+
+        while True:
+            initial_program = generate_program(self.config.length, rng)
+            steps, non_halting = compute_steps(initial_program)
+            if not non_halting:
+                break
+
+        # Via:
+        #   https://x.com/VictorTaelin/status/1776248021858111542
+        #   https://gist.github.com/VictorTaelin/e514844f4df9e5f182b28e5a07e44b17
+        prompt = f"""A::B is a system with 4 tokens: `A#`, `#A`, `B#` and `#B`.
+
+An A::B program is a sequence of tokens. Example:
+
+    B# A# #B #A B#
+
+To *compute* a program, we must rewrite neighbor tokens, using the rules:
+
+    A# #A ... becomes ... nothing
+    A# #B ... becomes ... #B A#
+    B# #A ... becomes ... #A B#
+    B# #B ... becomes ... nothing
+
+In other words, whenever two neighbor tokens have their '#' facing each-other,
+they must be rewritten according to the corresponding rule. For example, the
+first example shown here is computed as:
+
+    B# A# #B #A B# =
+    B# #B A# #A B# =
+    A# #A B# =
+    B#
+
+The steps were:
+1. We replaced `A# #B` by `#B A#`.
+2. We replaced `B# #B` by nothing.
+3. We replaced `A# #A` by nothing.
+The final result was just `B#`.
+
+Now, consider the following program:
+
+{' '.join(initial_program)}
+
+Return the final state of the program.
+"""
+
+        return {
+            "question": prompt,
+            "answer": " ".join(steps[-1]),
+            "metadata": {},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the AB task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+        if answer != entry["answer"]:
+            return 0.01
+        else:
+            return 1.0  # Yay
+
+
+# Register the dataset
+register_dataset("ab", ABDataset, ABConfig)
--- a/reasoning_gym/algorithmic/count_primes.py
+++ b/reasoning_gym/algorithmic/count_primes.py
@ -0,0 +1,63 @@
+"""Count prime numbers in a given interval.
+
+Solution obtained with Sieve of Eratosthenes:
+https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
+"""
+
+import math
+from dataclasses import dataclass
+from random import Random
+from typing import Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+QUESTION_TEMPLATE = """Count how many prime numbers there are between {start} and {end} (inclusive) ?"""
+
+
+@dataclass
+class CountPrimesConfig:
+    """Configuration for Count Primes dataset generation"""
+
+    max_n: int = 10_000  # Upper bound for the interval
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert 1 <= self.max_n, "max_n must be at least 1"
+
+
+class CountPrimesDataset(ProceduralDataset):
+    """Generates Count Primes exercises with configurable difficulty"""
+
+    def __init__(self, config: CountPrimesConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+        self.primes = self._get_primes(config.max_n + 1)
+
+    def _get_primes(self, n: int) -> list[bool]:
+        if n <= 1:
+            return []
+        primes = [True] * n
+        primes[0] = primes[1] = False
+        for i in range(2, int(math.sqrt(n)) + 1):
+            if primes[i]:
+                for j in range(2 * i, n, i):
+                    primes[j] = False
+        return primes
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Count Primes question"""
+        rng = Random(self.seed + idx)
+        start = rng.randint(1, self.config.max_n)
+        end = rng.randint(start, self.config.max_n)
+        primes = self.primes[start : end + 1]
+        answer = sum(primes)
+        return {
+            "question": QUESTION_TEMPLATE.format(start=start, end=end),
+            "answer": str(answer),
+            "metadata": {"start": start, "end": end, "primes": primes, "solution": answer},
+        }
+
+
+register_dataset("count_primes", CountPrimesDataset, CountPrimesConfig)
--- a/reasoning_gym/algorithmic/rotate_matrix.py
+++ b/reasoning_gym/algorithmic/rotate_matrix.py
@ -60,22 +60,16 @@ class RotateMatrixDataset(ProceduralDataset):
        matrix = [numbers[i * n : (i + 1) * n] for i in range(n)]
        return matrix

+    def _rot90(self, matrix: list[list[int]]) -> list[list[int]]:
+        """quarter clockwise rotation"""
+        return [list(row) for row in zip(*matrix[::-1])]
+
    def _get_rotated(self, matrix: list[list[int]], num_rotations: int) -> list[list[int]]:
        """Rotate the matrix K times by 90 degrees clockwise"""
        num_rotations %= 4
-        n = len(matrix)
        output = deepcopy(matrix)
-
        for _ in range(num_rotations):
-            for l in range(n // 2):
-                for i in range(l, n - 1 - l):
-                    (output[l][i], output[i][n - 1 - l], output[n - 1 - l][n - 1 - i], output[n - 1 - i][l]) = (
-                        output[n - 1 - i][l],
-                        output[l][i],
-                        output[i][n - 1 - l],
-                        output[n - 1 - l][n - 1 - i],
-                    )
-
+            output = self._rot90(output)
        return output

    def _matrix_to_str(self, matrix: list[list[int]]) -> str: