mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-29 17:35:16 +00:00
add simple dataset gallery generation script
This commit is contained in:
parent
71ccd41adb
commit
5a88cf2529
6 changed files with 1352 additions and 105 deletions
1309
GALLERY.md
Normal file
1309
GALLERY.md
Normal file
File diff suppressed because it is too large
Load diff
63
python
63
python
|
|
@ -1,63 +0,0 @@
|
||||||
"""Generate a markdown gallery of all available datasets with examples"""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
import textwrap
|
|
||||||
|
|
||||||
from reasoning_gym.factory import DATASETS, create_dataset
|
|
||||||
|
|
||||||
|
|
||||||
def generate_gallery() -> str:
|
|
||||||
"""Generate markdown content for the gallery"""
|
|
||||||
|
|
||||||
# Start with header
|
|
||||||
content = ["# Dataset Gallery\n"]
|
|
||||||
|
|
||||||
# Add index
|
|
||||||
content.append("## Available Datasets\n")
|
|
||||||
for name in sorted(DATASETS.keys()):
|
|
||||||
# Create anchor link
|
|
||||||
anchor = name.replace("_", "-")
|
|
||||||
content.append(f"- [{name}](#{anchor})\n")
|
|
||||||
content.append("\n")
|
|
||||||
|
|
||||||
# Add examples for each dataset
|
|
||||||
content.append("## Examples\n")
|
|
||||||
for name in sorted(DATASETS.keys()):
|
|
||||||
dataset = create_dataset(name)
|
|
||||||
|
|
||||||
# Add dataset header
|
|
||||||
content.append(f"### {name}\n")
|
|
||||||
|
|
||||||
# Get dataset class docstring if available
|
|
||||||
if dataset.__class__.__doc__:
|
|
||||||
doc = textwrap.dedent(dataset.__class__.__doc__.strip())
|
|
||||||
content.append(f"{doc}\n")
|
|
||||||
|
|
||||||
content.append("```\n")
|
|
||||||
# Show 3 examples
|
|
||||||
for i, item in enumerate(dataset):
|
|
||||||
if i >= 3:
|
|
||||||
break
|
|
||||||
content.append(f"Example {i+1}:\n")
|
|
||||||
content.append(f"Question: {item['question']}\n")
|
|
||||||
content.append(f"Answer: {item['answer']}\n")
|
|
||||||
content.append(f"Metadata: {item['metadata']}\n")
|
|
||||||
content.append("\n")
|
|
||||||
content.append("```\n\n")
|
|
||||||
|
|
||||||
return "".join(content)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Generate gallery markdown file"""
|
|
||||||
gallery_path = Path(__file__).parent.parent / "GALLERY.md"
|
|
||||||
gallery_content = generate_gallery()
|
|
||||||
|
|
||||||
with open(gallery_path, "w") as f:
|
|
||||||
f.write(gallery_content)
|
|
||||||
|
|
||||||
print(f"Generated gallery at {gallery_path}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
|
||||||
from collections.abc import Iterable, Sized
|
from collections.abc import Iterable, Sized
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from random import Random
|
from random import Random
|
||||||
from typing import Any, Dict, Iterator, Optional, TypeVar, Type
|
from typing import Any, Dict, Iterator, Optional, Type, TypeVar
|
||||||
|
|
||||||
|
|
||||||
class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
|
class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
|
||||||
|
|
@ -66,7 +66,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
|
||||||
return reward
|
return reward
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar('T', bound='ProceduralDataset')
|
T = TypeVar("T", bound="ProceduralDataset")
|
||||||
|
|
||||||
|
|
||||||
class ReseedingDataset(Iterable[Dict[str, Any]]):
|
class ReseedingDataset(Iterable[Dict[str, Any]]):
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,9 @@
|
||||||
from .family_relationships import FamilyRelationshipsConfig, FamilyRelationshipsDataset
|
from .family_relationships import FamilyRelationshipsConfig, FamilyRelationshipsDataset
|
||||||
|
from .quantum_lock import QuantumLockConfig, QuantumLockDataset
|
||||||
|
|
||||||
__all__ = ["FamilyRelationshipsDataset", "FamilyRelationshipsConfig"]
|
__all__ = [
|
||||||
|
"FamilyRelationshipsDataset",
|
||||||
|
"FamilyRelationshipsConfig",
|
||||||
|
"QuantumLockConfig",
|
||||||
|
"QuantumLockDataset",
|
||||||
|
]
|
||||||
|
|
|
||||||
6
scripts/generate_gallery.py
Normal file → Executable file
6
scripts/generate_gallery.py
Normal file → Executable file
|
|
@ -2,9 +2,11 @@
|
||||||
"""Generate a markdown gallery of all available datasets with examples"""
|
"""Generate a markdown gallery of all available datasets with examples"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
|
||||||
import textwrap
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import reasoning_gym.cognition.figlet_fonts
|
||||||
|
import reasoning_gym.cognition.rubiks_cube
|
||||||
from reasoning_gym.factory import DATASETS, create_dataset
|
from reasoning_gym.factory import DATASETS, create_dataset
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -54,7 +56,7 @@ def generate_gallery() -> str:
|
||||||
content.append(f"Example {i+1}:\n")
|
content.append(f"Example {i+1}:\n")
|
||||||
content.append(f"Question: {item['question']}\n")
|
content.append(f"Question: {item['question']}\n")
|
||||||
content.append(f"Answer: {item['answer']}\n")
|
content.append(f"Answer: {item['answer']}\n")
|
||||||
if item.get('metadata'):
|
if item.get("metadata"):
|
||||||
content.append(f"Metadata: {item['metadata']}\n")
|
content.append(f"Metadata: {item['metadata']}\n")
|
||||||
content.append("\n")
|
content.append("\n")
|
||||||
content.append("```\n\n")
|
content.append("```\n\n")
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reasoning_gym.dataset import ReseedingDataset
|
|
||||||
from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
|
from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
|
||||||
|
from reasoning_gym.dataset import ReseedingDataset
|
||||||
|
|
||||||
|
|
||||||
def test_reseeding_dataset_iteration():
|
def test_reseeding_dataset_iteration():
|
||||||
|
|
@ -9,14 +9,7 @@ def test_reseeding_dataset_iteration():
|
||||||
|
|
||||||
# Create base dataset
|
# Create base dataset
|
||||||
config = BasicArithmeticDatasetConfig(
|
config = BasicArithmeticDatasetConfig(
|
||||||
min_terms=2,
|
min_terms=2, max_terms=3, min_digits=1, max_digits=2, operators=["+"], allow_parentheses=False, seed=42, size=10
|
||||||
max_terms=3,
|
|
||||||
min_digits=1,
|
|
||||||
max_digits=2,
|
|
||||||
operators=["+"],
|
|
||||||
allow_parentheses=False,
|
|
||||||
seed=42,
|
|
||||||
size=10
|
|
||||||
)
|
)
|
||||||
base_dataset = BasicArithmeticDataset(config)
|
base_dataset = BasicArithmeticDataset(config)
|
||||||
|
|
||||||
|
|
@ -38,7 +31,7 @@ def test_reseeding_dataset_iteration():
|
||||||
|
|
||||||
# Verify chunks are different
|
# Verify chunks are different
|
||||||
chunk1 = first_items[:chunk_size]
|
chunk1 = first_items[:chunk_size]
|
||||||
chunk2 = first_items[chunk_size:2*chunk_size]
|
chunk2 = first_items[chunk_size : 2 * chunk_size]
|
||||||
assert chunk1 != chunk2, "Different chunks should generate different items"
|
assert chunk1 != chunk2, "Different chunks should generate different items"
|
||||||
|
|
||||||
# Test score_answer forwarding
|
# Test score_answer forwarding
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue