Merge remote-tracking branch 'upstream/main' into cryptarithm

2026-04-28 17:29:39 +00:00 · 2025-02-18 21:34:42 +08:00 · 2025-02-18 21:34:42 +08:00 · 0e49bd8180
commit 0e49bd8180
parent 3975f78343 f0572d550a
19 changed files with 608 additions and 128 deletions
--- a/reasoning_gym/algorithmic/base_conversion.py
+++ b/reasoning_gym/algorithmic/base_conversion.py
@ -6,6 +6,26 @@ from typing import Optional, Tuple

 from ..factory import ProceduralDataset, register_dataset

+QUESTION_TEMPLATE = """Your task is to convert a number between two different bases.
+
+If the target base is > 10, use lowercase letters a-z for digits above 9.
+
+Example:
+- Input: Convert the base-9 number 440 to base-5
+- Output: 2420
+- Explanation
+    - First, we convert the base-9 number 440 to base-10: 4 * 9**2 + 4 * 9**1 + 0 * 9**0 = 324 + 36 + 0 = 360
+    - Next, we convert the base-10 number 360 to base-5:
+        - 360 // 5 = 72 remainder 0
+        - 72 // 5 = 14 remainder 2
+        - 14 // 5 = 2 remainder 4
+        - 2 // 5 = 0 remainder 2
+    - Reading the remainders in reverse order gives us the base-5 number 2 4 2 0
+    - Hence, the final answer is 2420
+
+Now, convert the {source_name} number {source_repr} to {target_name}
+"""
+

@dataclass
 class BaseConversionConfig:
@ -90,11 +110,10 @@ class BaseConversionDataset(ProceduralDataset):
        source_name = self._format_base_name(source_base)
        target_name = self._format_base_name(target_base)

-        # Add hint for bases > 10 about using lowercase letters
-        hint = " (use lowercase letters a-z for digits above 9)" if target_base > 10 else ""
-
        return {
-            "question": f"Convert the {source_name} number {source_repr} to {target_name}{hint}",
+            "question": QUESTION_TEMPLATE.format(
+                source_name=source_name, source_repr=source_repr, target_name=target_name
+            ),
            "answer": target_repr,
            "metadata": {
                "decimal_value": value,
--- a/reasoning_gym/algorithmic/game_of_life.py
+++ b/reasoning_gym/algorithmic/game_of_life.py
@ -32,7 +32,7 @@ class GameOfLifeDataset(ProceduralDataset):

    def __init__(self, config: GameOfLifeConfig):
        self._prompt_templates = [
-            "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
+            "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
        ]

        super().__init__(config=config, seed=config.seed, size=config.size)
--- a/reasoning_gym/algorithmic/graph_color.py
+++ b/reasoning_gym/algorithmic/graph_color.py
@ -200,7 +200,7 @@ Vertices: {puzzle["vertices"]}
 Edges: {edges}
 Possible colors: {puzzle["color_options"]}

-Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}})
+Return your solution as a JSON map of vertices to colors. (For example: {{0: 1, 1: 2, 2: 3}})
 """

        return {
--- a/reasoning_gym/algorithmic/letter_jumble.py
+++ b/reasoning_gym/algorithmic/letter_jumble.py
@ -9,6 +9,30 @@ from reasoning_gym.data import read_data_file

 from ..factory import ProceduralDataset, register_dataset

+QUESTION_TEMPLATE = """Your task is to unsramble words in a sentence.
+
+For each word in a sentence, the letter may have been randomly shuffled. Your task is to unscramble the words.
+
+The order of the words in the sentence is preserved. Moreover, the style of the sentence is preserved (i.e. punctuation, capitalization, new lines, etc.).
+
+Example:
+- Input: Unscramble these words: raendgmeins yWh nya hilcd anc od hatt
+- Output: meanderings Why any child can do that
+- Explanation
+    - We unscramble each of the words independently.
+    - raendgmeins -> meanderings
+    - yWh -> Why
+    - nya -> any
+    - hilcd -> child
+    - anc -> can
+    - od -> do
+    - hatt -> that
+    - The final answer is: meanderings Why any child can do that
+    - Notice that the order of the words is preserved, no new words / symbols (e.g. new lines) are added.
+
+Now, unscramble these words: {words}
+"""
+

@dataclass
 class LetterJumbleConfig:
@ -89,7 +113,7 @@ class LetterJumbleDataset(ProceduralDataset):
        scrambled_words = [self._scramble_word(word, corruption_level, rng) for word in selected_words]

        return {
-            "question": f"Unscramble these words: {' '.join(scrambled_words)}",
+            "question": QUESTION_TEMPLATE.format(words=" ".join(scrambled_words)),
            "answer": " ".join(selected_words),
            "metadata": {
                "num_words": num_words,
@ -112,14 +136,16 @@ class LetterJumbleDataset(ProceduralDataset):
            float: The computed score between 0.0 and 1.0.
        """

-        if answer == None:
-            return 0.0
-
-        s_answer = answer.strip().lower()
-        if not s_answer == entry["answer"].strip().lower():
-            return 0.01
-        else:
-            return 1.0
+        oracle_answer = entry["answer"].strip()
+        if answer:
+            answer = answer.strip()
+            if answer == oracle_answer:
+                return 1.0
+            elif answer.lower() == oracle_answer.lower():
+                return 0.5
+            else:
+                return 0.01
+        return 0.0


 register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
--- a/reasoning_gym/algorithmic/word_ladder.py
+++ b/reasoning_gym/algorithmic/word_ladder.py
@ -9,8 +9,8 @@ from ..data import get_data_file_path
 from ..factory import ProceduralDataset, register_dataset

 QUESTION_TEMPLATE = """Transform the word ladder '{start}' to '{end}' by changing one letter at a time.
-                       Provide your answer as a comma-separated sequence of uppercase letters without spaces.
-                       Each step must be a valid English word."""
+Provide your answer as a comma-separated sequence of uppercase letters without spaces.
+Each step must be a valid English word."""


@dataclass