diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py index a17d0ea6..2b01898e 100644 --- a/reasoning_gym/games/__init__.py +++ b/reasoning_gym/games/__init__.py @@ -7,7 +7,7 @@ Game tasks for training reasoning capabilities: """ from .countdown import CountdownConfig, CountdownDataset -from .emoji_mystery import EmojiMysteryConfig, EmojiMysteryDataset +from .emoji_mystery import EmojiMysteryConfig, EmojiMysteryCurriculum, EmojiMysteryDataset from .futoshiki import FutoshikiConfig, FutoshikiDataset from .knight_swap import KnightSwapConfig, KnightSwapDataset from .mahjong import MahjongPuzzleConfig, MahjongPuzzleCurriculum, MahjongPuzzleDataset @@ -25,6 +25,7 @@ __all__ = [ "CountdownConfig", "CountdownDataset", "EmojiMysteryConfig", + "EmojiMysteryCurriculum", "EmojiMysteryDataset", "FutoshikiConfig", "FutoshikiDataset", diff --git a/reasoning_gym/games/emoji_mystery.py b/reasoning_gym/games/emoji_mystery.py index 27f48d87..be34b140 100644 --- a/reasoning_gym/games/emoji_mystery.py +++ b/reasoning_gym/games/emoji_mystery.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from random import Random from typing import Any, Optional +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..data import read_data_file from ..factory import ProceduralDataset, register_dataset @@ -188,7 +189,14 @@ class EmojiMysteryDataset(ProceduralDataset): secret_sentence = rng.choice(self.sentences).strip().replace("\n", " ") encoded_sentence = self.encode(secret_sentence, secret_emoji) question = QUESTION_TEMPLATE.format(sentence=encoded_sentence, hint_function=hint_function) - return {"question": question, "answer": secret_sentence, "metadata": {"emoji": secret_emoji}} + return { + "question": question, + "answer": secret_sentence, + "metadata": { + "emoji": secret_emoji, + "difficulty": {"num_words_in_sentence": len(re.findall(r"\b\w+\b", secret_sentence))}, + }, + } def variance_selector_to_byte(self, variation_selector: str) -> Optional[int]: variation_selector_codepoint = ord(variation_selector) @@ -233,4 +241,22 @@ class EmojiMysteryDataset(ProceduralDataset): return reward -register_dataset("emoji_mystery", EmojiMysteryDataset, EmojiMysteryConfig) +class EmojiMysteryCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(EmojiMysteryCurriculum.__name__, EmojiMysteryConfig) + + self._define_attributes( + RangeAttributeDefinition( + name="num_words_in_sentence", + levels=[3, 10, 20, 35], + default_level=0, + description="Number of words in the sentence", + attr_type=AttributeType.STATIC, + min_value=3, + lower_field_name="min_words_in_sentence", + upper_field_name="max_words_in_sentence", + ), + ) + + +register_dataset("emoji_mystery", EmojiMysteryDataset, EmojiMysteryConfig, EmojiMysteryCurriculum) diff --git a/tests/test_emoji_mystery.py b/tests/test_emoji_mystery.py index c7618240..4d754f45 100644 --- a/tests/test_emoji_mystery.py +++ b/tests/test_emoji_mystery.py @@ -2,7 +2,7 @@ from random import Random import pytest -from reasoning_gym.games.emoji_mystery import EmojiMysteryConfig, EmojiMysteryDataset +from reasoning_gym.games.emoji_mystery import EmojiMysteryConfig, EmojiMysteryCurriculum, EmojiMysteryDataset def test_emoji_mystery_config_validation(): @@ -101,3 +101,35 @@ def test_emoji_mystery_scoring(): # Test None answer assert dataset.score_answer(None, entry) == 0.0 + + +def test_emoji_mystery_curriculum(): + """Test the emoji mystery curriculum functionality""" + curriculum = EmojiMysteryCurriculum() + + base_value = {"size": 150, "seed": 1} + + # Test base configuration + base_cfg: EmojiMysteryConfig = curriculum.generate_configuration(base_value) + assert base_cfg.seed == 1 + assert base_cfg.size == 150 + assert base_cfg.min_words_in_sentence == 3 + assert base_cfg.max_words_in_sentence == 3 + + # Test incrementing attribute level + curriculum.increment_attr_level("num_words_in_sentence") + increased_cfg = curriculum.generate_configuration(base_value) + assert increased_cfg.min_words_in_sentence == 10 + assert increased_cfg.max_words_in_sentence == 10 + + # Test incrementing attribute level again + curriculum.increment_attr_level("num_words_in_sentence") + double_increased_cfg = curriculum.generate_configuration(base_value) + assert double_increased_cfg.min_words_in_sentence == 20 + assert double_increased_cfg.max_words_in_sentence == 20 + + # Test decrementing attribute level + curriculum.decrement_attr_level("num_words_in_sentence") + decreased_cfg = curriculum.generate_configuration(base_value) + assert decreased_cfg.min_words_in_sentence == 10 + assert decreased_cfg.max_words_in_sentence == 10