From b03b02f3a514de47374fbcab79e7b9ce40404c6e Mon Sep 17 00:00:00 2001 From: "Andreas Koepf (aider)" Date: Sun, 26 Jan 2025 15:56:03 +0100 Subject: [PATCH] refactor: Update sentence extraction regex to preserve ending punctuation --- reasoning_gym/algorithmic/sentence_reordering.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reasoning_gym/algorithmic/sentence_reordering.py b/reasoning_gym/algorithmic/sentence_reordering.py index 3ef8b07c..b2216c1e 100644 --- a/reasoning_gym/algorithmic/sentence_reordering.py +++ b/reasoning_gym/algorithmic/sentence_reordering.py @@ -33,8 +33,8 @@ class SentenceReorderingDataset(ProceduralDataset): # Extract sentences make sure they are greater than or equal to the number of words in a sentence # Ensure that only the length of alphanumeric characters in the sentence is considered self.sentences = [ - sentence - for sentence in re.findall(r"[^.!?]+", text) + sentence.strip() + for sentence in re.findall(r"[^.!?]+[.!?]", text) # Changed pattern to include the ending punctuation if self.config.min_words_in_sentence <= len(re.findall(r"\b\w+\b", sentence)) <= self.config.max_words_in_sentence ]