mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
fix: Ensure letter counting dataset extracts only alphanumeric words
This commit is contained in:
parent
3488d4339c
commit
102d4dae00
1 changed files with 2 additions and 1 deletions
|
|
@ -30,7 +30,8 @@ class LetterCountingDataset:
|
|||
|
||||
# Load and preprocess text
|
||||
text = read_data_file("in_the_year_2889.txt")
|
||||
self.words = [word for word in re.findall(r'\b\w+\b', text)]
|
||||
# Extract words and clean them to contain only alphanumeric characters
|
||||
self.words = [word for word in re.findall(r'\b\w+\b', text) if word.isalnum()]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.config.size
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue