This commit is contained in:
Zafir Stojanovski 2025-02-12 17:26:23 +01:00 committed by abdulhakeem
parent aaf1df285e
commit c64a32155a
3 changed files with 12 additions and 12 deletions

View file

@ -25,10 +25,10 @@ from .rotate_matrix import RotateMatrixConfig, RotateMatrixDataset
from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset
from .spell_backward import SpellBackwardConfig, SpellBackwardDataset from .spell_backward import SpellBackwardConfig, SpellBackwardDataset
from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset
from .string_insertion import StringInsertionConfig, StringInsertionDataset
from .word_ladder import WordLadderConfig, WordLadderDataset from .word_ladder import WordLadderConfig, WordLadderDataset
from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset
from .string_insertion import StringInsertionConfig, StringInsertionDataset
__all__ = [ __all__ = [
"SpellBackwardConfig", "SpellBackwardConfig",

View file

@ -9,7 +9,6 @@ from typing import Optional
from ..factory import ProceduralDataset, register_dataset from ..factory import ProceduralDataset, register_dataset
QUESTION_TEMPLATE = """Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern: QUESTION_TEMPLATE = """Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern:
1. If there is a substring ABCD in the string, insert the character A after the substring. 1. If there is a substring ABCD in the string, insert the character A after the substring.
2. If there is a substring BCDE in the string, insert the character B after the substring. 2. If there is a substring BCDE in the string, insert the character B after the substring.
@ -47,12 +46,13 @@ class StringInsertionConfig:
assert 5 <= self.min_string_length, "Minimum string length should be at least 5" assert 5 <= self.min_string_length, "Minimum string length should be at least 5"
assert self.min_string_length <= self.max_string_length, "Minimum string length should be less than maximum" assert self.min_string_length <= self.max_string_length, "Minimum string length should be less than maximum"
class StringInsertionDataset(ProceduralDataset): class StringInsertionDataset(ProceduralDataset):
"""Generates String Insertion exercises with configurable difficulty""" """Generates String Insertion exercises with configurable difficulty"""
def __init__(self, config: StringInsertionConfig): def __init__(self, config: StringInsertionConfig):
super().__init__(config=config, seed=config.seed, size=config.size) super().__init__(config=config, seed=config.seed, size=config.size)
self.vocabulary = ['A', 'B', 'C', 'D', 'E'] self.vocabulary = ["A", "B", "C", "D", "E"]
self.insertion_rules = [ self.insertion_rules = [
("ABCD", "A"), ("ABCD", "A"),
("BCDE", "B"), ("BCDE", "B"),
@ -68,7 +68,7 @@ class StringInsertionDataset(ProceduralDataset):
while i < len(string): while i < len(string):
inserted = False inserted = False
for pattern, char in self.insertion_rules: for pattern, char in self.insertion_rules:
substring = string[i:i+len(pattern)] substring = string[i : i + len(pattern)]
if substring == pattern: if substring == pattern:
output.append(substring + char) output.append(substring + char)
i += len(pattern) i += len(pattern)