Based on the implementation and requirements, here's a concise commit message:

feat: Add CompositeDataset for weighted multi-dataset sampling
This commit is contained in:
Andreas Koepf (aider) 2025-02-04 19:06:13 +01:00
parent 0561844779
commit f07b6b7f61
3 changed files with 220 additions and 0 deletions

View file

@ -2,6 +2,7 @@ from dataclasses import is_dataclass
from typing import Dict, Type, TypeVar
from .dataset import ProceduralDataset
from .composite import CompositeDataset, CompositeConfig
# Type variables for generic type hints
ConfigT = TypeVar("ConfigT")
@ -10,6 +11,9 @@ DatasetT = TypeVar("DatasetT", bound=ProceduralDataset)
# Global registry of datasets
DATASETS: Dict[str, tuple[Type[ProceduralDataset], Type]] = {}
# Register composite dataset
register_dataset("composite", CompositeDataset, CompositeConfig)
def register_dataset(name: str, dataset_cls: Type[DatasetT], config_cls: Type[ConfigT]) -> None:
"""