diff --git a/reasoning_gym/algorithmic/sentence_reordering.py b/reasoning_gym/algorithmic/sentence_reordering.py index 3ef8b07c..b2216c1e 100644 --- a/reasoning_gym/algorithmic/sentence_reordering.py +++ b/reasoning_gym/algorithmic/sentence_reordering.py @@ -33,8 +33,8 @@ class SentenceReorderingDataset(ProceduralDataset): # Extract sentences make sure they are greater than or equal to the number of words in a sentence # Ensure that only the length of alphanumeric characters in the sentence is considered self.sentences = [ - sentence - for sentence in re.findall(r"[^.!?]+", text) + sentence.strip() + for sentence in re.findall(r"[^.!?]+[.!?]", text) # Changed pattern to include the ending punctuation if self.config.min_words_in_sentence <= len(re.findall(r"\b\w+\b", sentence)) <= self.config.max_words_in_sentence ]