add simple dataset gallery generation script

This commit is contained in:
Andreas Koepf 2025-01-30 22:30:26 +01:00
parent 71ccd41adb
commit 5a88cf2529
6 changed files with 1352 additions and 105 deletions

1309
GALLERY.md Normal file

File diff suppressed because it is too large Load diff

63
python
View file

@ -1,63 +0,0 @@
"""Generate a markdown gallery of all available datasets with examples"""
from pathlib import Path
import textwrap
from reasoning_gym.factory import DATASETS, create_dataset
def generate_gallery() -> str:
"""Generate markdown content for the gallery"""
# Start with header
content = ["# Dataset Gallery\n"]
# Add index
content.append("## Available Datasets\n")
for name in sorted(DATASETS.keys()):
# Create anchor link
anchor = name.replace("_", "-")
content.append(f"- [{name}](#{anchor})\n")
content.append("\n")
# Add examples for each dataset
content.append("## Examples\n")
for name in sorted(DATASETS.keys()):
dataset = create_dataset(name)
# Add dataset header
content.append(f"### {name}\n")
# Get dataset class docstring if available
if dataset.__class__.__doc__:
doc = textwrap.dedent(dataset.__class__.__doc__.strip())
content.append(f"{doc}\n")
content.append("```\n")
# Show 3 examples
for i, item in enumerate(dataset):
if i >= 3:
break
content.append(f"Example {i+1}:\n")
content.append(f"Question: {item['question']}\n")
content.append(f"Answer: {item['answer']}\n")
content.append(f"Metadata: {item['metadata']}\n")
content.append("\n")
content.append("```\n\n")
return "".join(content)
def main():
"""Generate gallery markdown file"""
gallery_path = Path(__file__).parent.parent / "GALLERY.md"
gallery_content = generate_gallery()
with open(gallery_path, "w") as f:
f.write(gallery_content)
print(f"Generated gallery at {gallery_path}")
if __name__ == "__main__":
main()

View file

@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
from collections.abc import Iterable, Sized from collections.abc import Iterable, Sized
from copy import deepcopy from copy import deepcopy
from random import Random from random import Random
from typing import Any, Dict, Iterator, Optional, TypeVar, Type from typing import Any, Dict, Iterator, Optional, Type, TypeVar
class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]): class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
@ -66,7 +66,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
return reward return reward
T = TypeVar('T', bound='ProceduralDataset') T = TypeVar("T", bound="ProceduralDataset")
class ReseedingDataset(Iterable[Dict[str, Any]]): class ReseedingDataset(Iterable[Dict[str, Any]]):

View file

@ -1,3 +1,9 @@
from .family_relationships import FamilyRelationshipsConfig, FamilyRelationshipsDataset from .family_relationships import FamilyRelationshipsConfig, FamilyRelationshipsDataset
from .quantum_lock import QuantumLockConfig, QuantumLockDataset
__all__ = ["FamilyRelationshipsDataset", "FamilyRelationshipsConfig"] __all__ = [
"FamilyRelationshipsDataset",
"FamilyRelationshipsConfig",
"QuantumLockConfig",
"QuantumLockDataset",
]

6
scripts/generate_gallery.py Normal file → Executable file
View file

@ -2,9 +2,11 @@
"""Generate a markdown gallery of all available datasets with examples""" """Generate a markdown gallery of all available datasets with examples"""
import os import os
from pathlib import Path
import textwrap import textwrap
from pathlib import Path
import reasoning_gym.cognition.figlet_fonts
import reasoning_gym.cognition.rubiks_cube
from reasoning_gym.factory import DATASETS, create_dataset from reasoning_gym.factory import DATASETS, create_dataset
@ -54,7 +56,7 @@ def generate_gallery() -> str:
content.append(f"Example {i+1}:\n") content.append(f"Example {i+1}:\n")
content.append(f"Question: {item['question']}\n") content.append(f"Question: {item['question']}\n")
content.append(f"Answer: {item['answer']}\n") content.append(f"Answer: {item['answer']}\n")
if item.get('metadata'): if item.get("metadata"):
content.append(f"Metadata: {item['metadata']}\n") content.append(f"Metadata: {item['metadata']}\n")
content.append("\n") content.append("\n")
content.append("```\n\n") content.append("```\n\n")

View file

@ -1,7 +1,7 @@
import pytest import pytest
from reasoning_gym.dataset import ReseedingDataset
from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
from reasoning_gym.dataset import ReseedingDataset
def test_reseeding_dataset_iteration(): def test_reseeding_dataset_iteration():
@ -9,14 +9,7 @@ def test_reseeding_dataset_iteration():
# Create base dataset # Create base dataset
config = BasicArithmeticDatasetConfig( config = BasicArithmeticDatasetConfig(
min_terms=2, min_terms=2, max_terms=3, min_digits=1, max_digits=2, operators=["+"], allow_parentheses=False, seed=42, size=10
max_terms=3,
min_digits=1,
max_digits=2,
operators=["+"],
allow_parentheses=False,
seed=42,
size=10
) )
base_dataset = BasicArithmeticDataset(config) base_dataset = BasicArithmeticDataset(config)