fix unit tests, lower python dependency to 3.9

2026-04-24 17:05:03 +00:00 · 2025-01-26 16:55:17 +01:00 · 2025-01-26 16:55:17 +01:00 · ad9f0d265c
commit ad9f0d265c
parent ee67374aae
11 changed files with 66 additions and 56 deletions
--- a/reasoning_gym/algorithmic/init.py
+++ b/reasoning_gym/algorithmic/init.py
@ -15,7 +15,7 @@ from .number_sorting import NumberSortingConfig, NumberSortingDataset
 from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset
 from .spell_backward import SpellBackwardConfig, SpellBackwardDataset
 from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
-from .word_sorting import WordSortingConfig, WordSortingDataset, TextTransformation
+from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset

 __all__ = [
    "SpellBackwardConfig",
--- a/reasoning_gym/algorithmic/sentence_reordering.py
+++ b/reasoning_gym/algorithmic/sentence_reordering.py
@ -8,9 +8,11 @@ from typing import List, Optional
 from ..data import read_data_file
 from ..factory import ProceduralDataset, register_dataset

+
@dataclass
 class SentenceReorderingConfig:
    """Configuration for sentence reordering task generation"""
+
    min_words_in_sentence: int = 3
    max_words_in_sentence: int = 20
    seed: Optional[int] = None
@ -19,7 +21,12 @@ class SentenceReorderingConfig:
    def validate(self) -> None:
        """Validate configuration parameters"""
        assert self.min_words_in_sentence > 0, "min_words_in_sentence must be positive"
-        assert self.max_words_in_sentence >= self.min_words_in_sentence, "max_words_in_sentence must be >= min_words_in_sentence"
+        assert (
+            self.max_words_in_sentence >= self.min_words_in_sentence
+        ), "max_words_in_sentence must be >= min_words_in_sentence"
+        assert (
+            self.max_words_in_sentence >= self.min_words_in_sentence
+        ), "max_words_in_sentence must be >= min_words_in_sentence"


 class SentenceReorderingDataset(ProceduralDataset):
@ -35,7 +42,9 @@ class SentenceReorderingDataset(ProceduralDataset):
        self.sentences = [
            sentence.strip()
            for sentence in re.findall(r"[^.!?]+[.!?]", text)  # Changed pattern to include the ending punctuation
-            if self.config.min_words_in_sentence <= len(re.findall(r"\b\w+\b", sentence)) <= self.config.max_words_in_sentence
+            if self.config.min_words_in_sentence
+            <= len(re.findall(r"\b\w+\b", sentence))
+            <= self.config.max_words_in_sentence
        ]

    def _generate_sentence_dataset(self, sentence: str, seed: int, idx: int, shuffle=True):
@ -66,22 +75,22 @@ class SentenceReorderingDataset(ProceduralDataset):
        sentence_dataset = self._generate_sentence_dataset(rng.choice(self.sentences), self.seed, idx)

        # Ensure only 'input' and 'goal' keys are present
-        if set(sentence_dataset.keys()) != {'input', 'goal'}:
+        if set(sentence_dataset.keys()) != {"input", "goal"}:
            raise KeyError("The dictionary must contain only 'input' and 'goal' keys")
-        
+
        # Solve the task by sorting words to match the goal sentence
-        input_words = sentence_dataset['input'].split()
+        input_words = sentence_dataset["input"].split()
        question = " ".join(input_words)
-        goal_words = sentence_dataset['goal'].split()
+        goal_words = sentence_dataset["goal"].split()
        solved_sentence = " ".join(sorted(input_words, key=lambda word: goal_words.index(word)))
        # Check for length of alphanumeric characters in the solved sentence
        word_count = len(re.findall(r"\b\w+\b", solved_sentence))

-
        return {
            "question": f"Restore the correct order of words in the following sentence: {question}",
            "answer": solved_sentence,
            "metadata": {"word_count": word_count},
        }
-        
+
+
 register_dataset("sentence_reordering", SentenceReorderingDataset, SentenceReorderingConfig)
--- a/reasoning_gym/algorithmic/word_sorting.py
+++ b/reasoning_gym/algorithmic/word_sorting.py
@ -12,8 +12,9 @@ from ..factory import ProceduralDataset, register_dataset

 class TextTransformation(str, Enum):
    """Text transformation options"""
+
    LOWERCASE = "lowercase"
-    UPPERCASE = "uppercase" 
+    UPPERCASE = "uppercase"
    ORIGINAL = "original"
    RANDOMCASE = "randomcase"

@ -21,6 +22,7 @@ class TextTransformation(str, Enum):
@dataclass
 class WordSortingConfig:
    """Configuration for word sorting task generation"""
+
    min_words: int = 3  # Minimum words to sort
    max_words: int = 10  # Maximum words to sort
    min_word_length: int = 3  # Minimum word length
@ -43,14 +45,17 @@ class WordSortingDataset(ProceduralDataset):

    def __init__(self, config: WordSortingConfig):
        super().__init__(config=config, seed=config.seed, size=config.size)
-        
+
        # Load and preprocess text
        text = read_data_file("in_the_year_2889.txt")
        # Extract unique words within length constraints
-        self.words = list(set(
-            word for word in re.findall(r'\b\w+\b', text)
-            if self.config.min_word_length <= len(word) <= self.config.max_word_length
-        ))
+        self.words = list(
+            set(
+                word
+                for word in re.findall(r"\b\w+\b", text)
+                if self.config.min_word_length <= len(word) <= self.config.max_word_length
+            )
+        )

    def _transform_word(self, word: str, rng: Random) -> str:
        """Apply configured transformation to word"""
@ -59,19 +64,18 @@ class WordSortingDataset(ProceduralDataset):
        elif self.config.transformation == TextTransformation.UPPERCASE:
            return word.upper()
        elif self.config.transformation == TextTransformation.RANDOMCASE:
-            return ''.join(c.upper() if rng.choice([True, False]) else c.lower() 
-                         for c in word)
+            return "".join(c.upper() if rng.choice([True, False]) else c.lower() for c in word)
        return word  # ORIGINAL case

    def _generate_words(self, rng: Random) -> Tuple[List[str], List[str]]:
        """Generate list of words and their transformed versions"""
        count = rng.randint(self.config.min_words, self.config.max_words)
-        
+
        # Select random words
        selected_words = rng.sample(self.words, count)
        # Apply transformation
        transformed_words = [self._transform_word(word, rng) for word in selected_words]
-        
+
        return selected_words, transformed_words

    def __getitem__(self, idx: int) -> dict:
@ -97,7 +101,7 @@ class WordSortingDataset(ProceduralDataset):
                "transformed_words": transformed_words,
                "direction": direction,
                "transformation": self.config.transformation,
-                "sorted_words": answer
+                "sorted_words": answer,
            },
        }