string insertion curriculum (#305)

This commit is contained in:
Zafir Stojanovski 2025-03-09 18:11:29 +01:00 committed by GitHub
parent 91aa3f3ae2
commit e1e05884ee
3 changed files with 56 additions and 4 deletions

View file

@ -37,7 +37,7 @@ from .rotten_oranges import RottenOrangesConfig, RottenOrangesCurriculum, Rotten
from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset
from .spell_backward import SpellBackwardConfig, SpellBackwardDataset
from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixCurriculum, SpiralMatrixDataset
from .string_insertion import StringInsertionConfig, StringInsertionDataset
from .string_insertion import StringInsertionConfig, StringInsertionCurriculum, StringInsertionDataset
from .string_manipulation import StringManipulationConfig, StringManipulationDataset
from .string_splitting import StringSplittingConfig, StringSplittingDataset
from .string_synthesis import StringSynthesisConfig, StringSynthesisDataset
@ -117,6 +117,7 @@ __all__ = [
"GraphColorCurriculum",
"StringInsertionConfig",
"StringInsertionDataset",
"StringInsertionCurriculum",
"StringManipulationConfig",
"StringManipulationDataset",
"StringSplittingConfig",

View file

@ -7,6 +7,7 @@ from dataclasses import dataclass
from random import Random
from typing import Any, Optional
from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
QUESTION_TEMPLATE = """Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern:
@ -100,8 +101,33 @@ class StringInsertionDataset(ProceduralDataset):
return {
"question": QUESTION_TEMPLATE.format(string=string),
"answer": str(answer),
"metadata": {"string": string, "solution": answer},
"metadata": {
"string": string,
"solution": answer,
"difficulty": {
"string_length": string_length,
},
},
}
register_dataset("string_insertion", StringInsertionDataset, StringInsertionConfig)
class StringInsertionCurriculum(BaseCurriculum):
def __init__(self):
super().__init__(StringInsertionCurriculum.__name__, StringInsertionConfig)
# Define attributes
self._define_attributes(
RangeAttributeDefinition(
name="string_length",
levels=[10, 50, 100, 1000],
default_level=1,
description="Length of the string",
attr_type=AttributeType.APPEND,
min_value=5,
lower_field_name="min_string_length",
upper_field_name="max_string_length",
),
)
register_dataset("string_insertion", StringInsertionDataset, StringInsertionConfig, StringInsertionCurriculum)