lint

2026-04-28 17:29:39 +00:00 · 2025-01-30 23:14:32 +01:00 · 2025-01-30 23:14:32 +01:00 · 5ae329becd
commit 5ae329becd
parent 048a165314
6 changed files with 148 additions and 124 deletions
--- a/GALLERY.md
+++ b/GALLERY.md
@ -33,6 +33,7 @@ This gallery shows examples from all available datasets using their default conf
 - [spell_backward](#spell_backward)
 - [sudoku](#sudoku)
 - [syllogism](#syllogism)
 - [word_ladder](#word_ladder)
 - [word_sequence_reversal](#word_sequence_reversal)
 - [word_sorting](#word_sorting)
@ -710,19 +711,19 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: How many times does the letter "w" appear in the text: "bed and enters his mechanical dresser Two minutes later the machine deposited him all dressed"?
+Question: How many times does the letter "o" appear in the text: "bed and enters his mechanical dresser Two minutes later the machine deposited him all dressed"?
-Answer: 1
+Answer: 2
-Metadata: {'span_length': 15, 'target_letter': 'w', 'span': ['bed', 'and', 'enters', 'his', 'mechanical', 'dresser', 'Two', 'minutes', 'later', 'the', 'machine', 'deposited', 'him', 'all', 'dressed']}
+Metadata: {'span_length': 15, 'target_letter': 'o', 'span': ['bed', 'and', 'enters', 'his', 'mechanical', 'dresser', 'Two', 'minutes', 'later', 'the', 'machine', 'deposited', 'him', 'all', 'dressed']}
 Example 2:
-Question: How many times does the letter "p" appear in the text: "it into a watering place"?
+Question: How many times does the letter "c" appear in the text: "it into a watering place"?
 Answer: 1
-Metadata: {'span_length': 5, 'target_letter': 'p', 'span': ['it', 'into', 'a', 'watering', 'place']}
+Metadata: {'span_length': 5, 'target_letter': 'c', 'span': ['it', 'into', 'a', 'watering', 'place']}
 Example 3:
-Question: How many times does the letter "t" appear in the text: "readable form accessible by the widest array of equipment including outdated"?
+Question: How many times does the letter "o" appear in the text: "readable form accessible by the widest array of equipment including outdated"?
-Answer: 5
+Answer: 3
-Metadata: {'span_length': 11, 'target_letter': 't', 'span': ['readable', 'form', 'accessible', 'by', 'the', 'widest', 'array', 'of', 'equipment', 'including', 'outdated']}
+Metadata: {'span_length': 11, 'target_letter': 'o', 'span': ['readable', 'form', 'accessible', 'by', 'the', 'widest', 'array', 'of', 'equipment', 'including', 'outdated']}
 ```
@ -1443,6 +1444,38 @@ Metadata: {'premise1': 'All butterflies are tigers', 'premise2': 'No tigers are
 ```
 ### word_ladder
 Generates word ladder transformation tasks
 Default configuration:
 ```python
 min_word_length = 3
 max_word_length = 5
 min_chain_length = -1
 max_chain_length = -1
 seed = 42
 size = 500
 ```
 Example tasks:
 ```
 Example 1:
 Question: Transform the word 'CEILS' into 'ANIGH' by changing one letter at a time. Each step must create a valid English word (including plurals) and keep the same word length. Show the sequence of words needed.
 Answer: CEILS,TEILS,TEINS,THINS,THIGS,THIGH,AHIGH,ANIGH
 Metadata: {'start_word': 'CEILS', 'end_word': 'ANIGH', 'word_length': 5, 'chain_length': 8}
 Example 2:
 Question: Transform the word 'KAW' into 'EFS' by changing one letter at a time. Each step must create a valid English word (including plurals) and keep the same word length. Show the sequence of words needed.
 Answer: KAW,KAS,EAS,EFS
 Metadata: {'start_word': 'KAW', 'end_word': 'EFS', 'word_length': 3, 'chain_length': 4}
 Example 3:
 Question: Transform the word 'SAUT' into 'SKER' by changing one letter at a time. Each step must create a valid English word (including plurals) and keep the same word length. Show the sequence of words needed.
 Answer: SAUT,SHUT,SHET,SKET,SKER
 Metadata: {'start_word': 'SAUT', 'end_word': 'SKER', 'word_length': 4, 'chain_length': 5}
 ```
 ### word_sequence_reversal
 Generates word sequence reversal tasks from text spans
@ -1491,21 +1524,21 @@ Example tasks:
 ```
 Example 1:
 Question: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-due, ever, many, generations
+Wolcott, keep, reaching, times
-Answer: due, ever, generations, many
+Answer: Wolcott, keep, reaching, times
-Metadata: {'original_words': ['due', 'ever', 'many', 'generations'], 'transformed_words': ['due', 'ever', 'many', 'generations'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['due', 'ever', 'generations', 'many']}
+Metadata: {'original_words': ['Wolcott', 'keep', 'reaching', 'times'], 'transformed_words': ['Wolcott', 'keep', 'reaching', 'times'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['Wolcott', 'keep', 'reaching', 'times']}
 Example 2:
 Question: Sort these words in descending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-change, 250, young
+took, critical, condense
-Answer: young, change, 250
+Answer: took, critical, condense
-Metadata: {'original_words': ['change', '250', 'young'], 'transformed_words': ['change', '250', 'young'], 'direction': 'descending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['young', 'change', '250']}
+Metadata: {'original_words': ['took', 'critical', 'condense'], 'transformed_words': ['took', 'critical', 'condense'], 'direction': 'descending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['took', 'critical', 'condense']}
 Example 3:
 Question: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-industry, elementary, traverse, stepped, meals, rub, resultant, etheric, irritation
+apartment, yellow, Just, pleasure, collapse, different, purchasers, taking, opening
-Answer: elementary, etheric, industry, irritation, meals, resultant, rub, stepped, traverse
+Answer: Just, apartment, collapse, different, opening, pleasure, purchasers, taking, yellow
-Metadata: {'original_words': ['industry', 'elementary', 'traverse', 'stepped', 'meals', 'rub', 'resultant', 'etheric', 'irritation'], 'transformed_words': ['industry', 'elementary', 'traverse', 'stepped', 'meals', 'rub', 'resultant', 'etheric', 'irritation'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['elementary', 'etheric', 'industry', 'irritation', 'meals', 'resultant', 'rub', 'stepped', 'traverse']}
+Metadata: {'original_words': ['apartment', 'yellow', 'Just', 'pleasure', 'collapse', 'different', 'purchasers', 'taking', 'opening'], 'transformed_words': ['apartment', 'yellow', 'Just', 'pleasure', 'collapse', 'different', 'purchasers', 'taking', 'opening'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['Just', 'apartment', 'collapse', 'different', 'opening', 'pleasure', 'purchasers', 'taking', 'yellow']}
 ```
--- a/examples/generate_word_ladder_examples.py
+++ b/examples/generate_word_ladder_examples.py
@ -1,19 +1,21 @@
 # generates dataset of word ladder examples, and then generates simulated chain of thought reasoning for each example
 import reasoning_gym
 from openai import OpenAI
 import os
 from openai import OpenAI
 import reasoning_gym
 # Configuration for the dataset
 config = {
-    'dataset_name': 'word_ladder',
+    "dataset_name": "word_ladder",
-    'dataset_config': {
+    "dataset_config": {
-        'min_word_length': 5,
+        "min_word_length": 5,
-        'max_word_length': 5,
+        "max_word_length": 5,
-        'min_chain_length':3, # set to -1 for shortest possible path, increase to generate more examples
+        "min_chain_length": 3,  # set to -1 for shortest possible path, increase to generate more examples
-        'max_chain_length':5,
+        "max_chain_length": 5,
-        'size': 1,  # Generate a small dataset for demonstration
+        "size": 1,  # Generate a small dataset for demonstration
-    }
+    },
 }
 system_prompt = """Word Ladder puzzles involve transforming a start word into an end word.
@ -177,8 +179,8 @@ Yes, that's four steps, each changing one letter, all words are valid. So that s
 """
 client = OpenAI(
-  base_url="https://openrouter.ai/api/v1", # openrouter used in this example
+    base_url="https://openrouter.ai/api/v1",  # openrouter used in this example
-  api_key=os.environ["OPENROUTER_API_KEY"], # set your key in environment variable
+    api_key=os.environ["OPENROUTER_API_KEY"],  # set your key in environment variable
 )
@ -188,33 +190,26 @@ def generate_cot(question: str, answer: str) -> str:
    Provide the verbose chain of thought reasoning to transform the start word into the end word exactly in the style and length required."""
    completion = client.chat.completions.create(
-        model="microsoft/phi-4", # choose model
+        model="microsoft/phi-4",  # choose model
-        messages=[
+        messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}],
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=0.6,
-        max_tokens=10000
+        max_tokens=10000,
    )
    return completion.choices[0].message.content
 # Create the word ladder dataset
-dataset = reasoning_gym.create_dataset(config['dataset_name'], **config['dataset_config'])
+dataset = reasoning_gym.create_dataset(config["dataset_name"], **config["dataset_config"])
 print(f"Generated {len(dataset)} examples, moving on to generate CoT reasoning...")
 # Generate and print examples with CoT
 for item in dataset:
    # Generate CoT reasoning demo
-    item['reasoning'] = generate_cot(item['question'],item['answer'])
+    item["reasoning"] = generate_cot(item["question"], item["answer"])
    print("\n--- Example ---")
-    print("Question:", item['question'])
+    print("Question:", item["question"])
-    print("Answer:", item['answer'])
+    print("Answer:", item["answer"])
    print("\nChain of Thought:")
-    print(item['reasoning'])
+    print(item["reasoning"])
-    print("\nMetadata:", item['metadata']) 
+    print("\nMetadata:", item["metadata"])
--- a/reasoning_gym/algorithmic/init.py
+++ b/reasoning_gym/algorithmic/init.py
@ -14,9 +14,9 @@ from .number_filtering import NumberFilteringConfig, NumberFilteringDataset
 from .number_sorting import NumberSortingConfig, NumberSortingDataset
 from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset
 from .spell_backward import SpellBackwardConfig, SpellBackwardDataset
 from .word_ladder import WordLadderConfig, WordLadderDataset
 from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
 from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset
 from .word_ladder import WordLadderConfig, WordLadderDataset
 __all__ = [
    "SpellBackwardConfig",
--- a/reasoning_gym/algorithmic/word_ladder.py
+++ b/reasoning_gym/algorithmic/word_ladder.py
@ -1,23 +1,25 @@
 """Word ladder task generator"""
 from collections import deque
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional, Set, Dict, Tuple
+from typing import Dict, List, Optional, Set, Tuple
-from collections import deque
+
 from reasoning_gym.data import read_data_file
 from ..factory import ProceduralDataset, register_dataset
@dataclass
 class WordLadderConfig:
    """Configuration for word ladder task generation"""
-    min_word_length: int = 3       # Minimum word length
+    min_word_length: int = 3  # Minimum word length
-    max_word_length: int = 5       # Maximum word length
+    max_word_length: int = 5  # Maximum word length
-    min_chain_length: int = -1     # Set to -1 for shortest path or a minimum of 3
+    min_chain_length: int = -1  # Set to -1 for shortest path or a minimum of 3
-    max_chain_length: int = -1     # Set to -1 for shortest path or a max 
+    max_chain_length: int = -1  # Set to -1 for shortest path or a max
    seed: Optional[int] = None
-    size: int = 500                # Virtual dataset size
+    size: int = 500  # Virtual dataset size
    def validate(self) -> None:
        """Validate configuration parameters"""
@ -28,13 +30,16 @@ class WordLadderConfig:
        # Modified validation logic
        if self.min_chain_length == -1:
            if self.max_chain_length != -1:
-                assert self.max_chain_length >= 3, "When min_chain_length=-1 (shortest path), max_chain_length must be -1 or >=3"
+                assert (
                    self.max_chain_length >= 3
                ), "When min_chain_length=-1 (shortest path), max_chain_length must be -1 or >=3"
        elif self.max_chain_length == -1:
            raise AssertionError("max_chain_length cannot be -1 unless min_chain_length is also -1")
        else:
            assert self.min_chain_length >= 3, "min_chain_length must be 3 or -1"
            assert self.max_chain_length >= self.min_chain_length, "max_chain_length must be >= min_chain_length"
 class WordLadderDataset(ProceduralDataset):
    """Generates word ladder transformation tasks"""
@ -48,6 +53,7 @@ class WordLadderDataset(ProceduralDataset):
        """Load words from CSV file organized by length"""
        import csv
        from io import StringIO
        word_sets = {}
        try:
@ -61,8 +67,8 @@ class WordLadderDataset(ProceduralDataset):
            for row in reader:
                # Process each word length column
                for length in range(3, 6):
-                    col_name = f'{length}_letter'
+                    col_name = f"{length}_letter"
-                    word = row.get(col_name, '')
+                    word = row.get(col_name, "")
                    if not word:  # Skip empty entries
                        continue
@ -159,11 +165,11 @@ class WordLadderDataset(ProceduralDataset):
        for i in range(len(word_chars)):
            original = word_chars[i]
-            for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
+            for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
                if c == original:
                    continue
                word_chars[i] = c
-                new_word = ''.join(word_chars)
+                new_word = "".join(word_chars)
                if new_word in word_set:
                    neighbors.add(new_word)
            word_chars[i] = original
@ -179,8 +185,8 @@ class WordLadderDataset(ProceduralDataset):
            start, end = rng.sample(sorted(word_set), 2)
            path = self._find_path(start, end, word_set)
            if path and (
-                (self.config.min_chain_length == -1 and self.config.max_chain_length == -1) or
+                (self.config.min_chain_length == -1 and self.config.max_chain_length == -1)
-                (self.config.min_chain_length <= len(path) <= self.config.max_chain_length)
+                or (self.config.min_chain_length <= len(path) <= self.config.max_chain_length)
            ):
                return start, end, path
@ -195,12 +201,7 @@ class WordLadderDataset(ProceduralDataset):
        return {
            "question": f"Transform the word '{start}' into '{end}' by changing one letter at a time. Each step must create a valid English word (including plurals) and keep the same word length. Show the sequence of words needed.",
            "answer": ",".join(path),
-            "metadata": {
+            "metadata": {"start_word": start, "end_word": end, "word_length": length, "chain_length": len(path)},
                "start_word": start,
                "end_word": end,
                "word_length": length,
                "chain_length": len(path)
            }
        }
--- a/tests/test_word_ladder.py
+++ b/tests/test_word_ladder.py
@ -44,12 +44,7 @@ def test_word_ladder_dataset_deterministic():
 def test_word_ladder_dataset_items():
    """Test basic properties of generated items"""
    config = WordLadderConfig(
-        min_word_length=3,
+        min_word_length=3, max_word_length=5, min_chain_length=3, max_chain_length=5, size=10, seed=42
        max_word_length=5,
        min_chain_length=3,
        max_chain_length=5,
        size=10,
        seed=42
    )
    dataset = WordLadderDataset(config)