add reasoning_gym.create_dataset({name}, ...) global factory function

This commit is contained in:
Andreas Koepf 2025-01-25 00:58:34 +01:00
parent 0d2d8ba6a0
commit 519e411fa5
35 changed files with 133 additions and 598 deletions

View file

@ -6,31 +6,21 @@ Algorithmic tasks for training reasoning capabilities:
- Pattern matching
"""
from reasoning_gym.arithmetic.basic_arithmetic import basic_arithmetic_dataset
from reasoning_gym.arithmetic.chain_sum import chain_sum_dataset
from .base_conversion import BaseConversionConfig, BaseConversionDataset, base_conversion_dataset
from .letter_counting import LetterCountingConfig, LetterCountingDataset, letter_counting_dataset
from .number_filtering import NumberFilteringConfig, NumberFilteringDataset, number_filtering_dataset
from .number_sorting import NumberSortingConfig, NumberSortingDataset, number_sorting_dataset
from .word_reversal import WordReversalConfig, WordReversalDataset, word_reversal_dataset
from .base_conversion import BaseConversionConfig, BaseConversionDataset
from .letter_counting import LetterCountingConfig, LetterCountingDataset
from .number_filtering import NumberFilteringConfig, NumberFilteringDataset
from .number_sorting import NumberSortingConfig, NumberSortingDataset
from .word_reversal import WordReversalConfig, WordReversalDataset
__all__ = [
"basic_arithmetic_dataset",
"BaseConversionConfig",
"BaseConversionDataset",
"base_conversion_dataset",
"chain_sum_dataset",
"LetterCountingConfig",
"LetterCountingDataset",
"letter_counting_dataset",
"NumberFilteringConfig",
"NumberFilteringDataset",
"number_filtering_dataset",
"NumberSortingConfig",
"NumberSortingDataset",
"number_sorting_dataset",
"WordReversalConfig",
"WordReversalDataset",
"word_reversal_dataset",
]

View file

@ -4,7 +4,7 @@ from dataclasses import dataclass
from random import Random
from typing import Optional, Tuple
from ..dataset import ProceduralDataset
from ..factory import ProceduralDataset, register_dataset
@dataclass
@ -88,21 +88,4 @@ class BaseConversionDataset(ProceduralDataset):
}
def base_conversion_dataset(
min_base: int = 2,
max_base: int = 16,
min_value: int = 0,
max_value: int = 1000,
seed: Optional[int] = None,
size: int = 500,
) -> BaseConversionDataset:
"""Create a BaseConversionDataset with the given configuration."""
config = BaseConversionConfig(
min_base=min_base,
max_base=max_base,
min_value=min_value,
max_value=max_value,
seed=seed,
size=size,
)
return BaseConversionDataset(config)
register_dataset("base_conversion", BaseConversionDataset, BaseConversionConfig)

View file

@ -7,7 +7,7 @@ from typing import List, Optional
from reasoning_gym.data import read_data_file
from ..dataset import ProceduralDataset
from ..factory import ProceduralDataset, register_dataset
@dataclass
@ -63,17 +63,4 @@ class LetterCountingDataset(ProceduralDataset):
}
def letter_counting_dataset(
min_words: int = 5,
max_words: int = 15,
seed: Optional[int] = None,
size: int = 500,
) -> LetterCountingDataset:
"""Create a LetterCountingDataset with the given configuration."""
config = LetterCountingConfig(
min_words=min_words,
max_words=max_words,
seed=seed,
size=size,
)
return LetterCountingDataset(config)
register_dataset("letter_counting", LetterCountingDataset, LetterCountingConfig)

View file

@ -4,7 +4,7 @@ from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple
from ..dataset import ProceduralDataset
from ..factory import ProceduralDataset, register_dataset
@dataclass
@ -98,25 +98,4 @@ class NumberFilteringDataset(ProceduralDataset):
}
def number_filtering_dataset(
min_numbers: int = 3,
max_numbers: int = 10,
min_decimals: int = 0,
max_decimals: int = 4,
min_value: float = -100.0,
max_value: float = 100.0,
seed: Optional[int] = None,
size: int = 500,
) -> NumberFilteringDataset:
"""Create a NumberFilteringDataset with the given configuration."""
config = NumberFilteringConfig(
min_numbers=min_numbers,
max_numbers=max_numbers,
min_decimals=min_decimals,
max_decimals=max_decimals,
min_value=min_value,
max_value=max_value,
seed=seed,
size=size,
)
return NumberFilteringDataset(config)
register_dataset("number_filtering", NumberFilteringDataset, NumberFilteringConfig)

View file

@ -4,7 +4,7 @@ from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple
from ..dataset import ProceduralDataset
from ..factory import ProceduralDataset, register_dataset
@dataclass
@ -86,25 +86,4 @@ class NumberSortingDataset(ProceduralDataset):
}
def number_sorting_dataset(
min_numbers: int = 3,
max_numbers: int = 10,
min_decimals: int = 0,
max_decimals: int = 2,
min_value: float = -100.0,
max_value: float = 100.0,
seed: Optional[int] = None,
size: int = 500,
) -> NumberSortingDataset:
"""Create a NumberSortingDataset with the given configuration."""
config = NumberSortingConfig(
min_numbers=min_numbers,
max_numbers=max_numbers,
min_decimals=min_decimals,
max_decimals=max_decimals,
min_value=min_value,
max_value=max_value,
seed=seed,
size=size,
)
return NumberSortingDataset(config)
register_dataset("number_sorting", NumberSortingDataset, NumberSortingConfig)

View file

@ -6,7 +6,7 @@ from random import Random
from typing import List, Optional
from ..data import read_data_file
from ..dataset import ProceduralDataset
from ..factory import ProceduralDataset, register_dataset
@dataclass
@ -55,17 +55,4 @@ class WordReversalDataset(ProceduralDataset):
}
def word_reversal_dataset(
min_words: int = 3,
max_words: int = 8,
seed: Optional[int] = None,
size: int = 500,
) -> WordReversalDataset:
"""Create a WordReversalDataset with the given configuration."""
config = WordReversalConfig(
min_words=min_words,
max_words=max_words,
seed=seed,
size=size,
)
return WordReversalDataset(config)
register_dataset("word_reversal", WordReversalDataset, WordReversalConfig)