reasoning-gym/reasoning_gym/algorithmic/caesar_cipher.py
Zafir Stojanovski dced3bfc45
fix(curriculum): Make boundaries in curriculum more sensible (#407)
* init

* fix tests

* unify codeio

* filtered for libraries not present in reasoning-gym

* fix more bounds

* puzzle24

* knight swap curriculum

* fix number sorting

* fix attributes

* add validation of config in creation of dataset

* dry run for instantiating and validating the datasets

* remove unused imports

* fix curriculum tests to reference newly updated attribute names
2025-04-04 20:24:14 +02:00

122 lines
4.4 KiB
Python

"""Caesar cipher task generator"""
from dataclasses import dataclass
from random import Random
from typing import Optional
from ..coaching import BaseCurriculum, RangeAttributeDefinition
from ..data import read_data_file
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "caesar_cipher"
@dataclass
class CaesarCipherConfig:
"""Configuration for Caesar cipher task generation"""
delimiter: str = "." # Delimiter for splitting text into sentences
min_words: int = 3 # Minimum words per sentence
max_words: int = 20 # Maximum words per sentence
min_rotation: int = 1 # Minimum Caesar rotation
max_rotation: int = 25 # Maximum Caesar rotation
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_words > 0, "min_words must be positive"
assert self.max_words >= self.min_words, "max_words must be >= min_words"
assert 0 < self.min_rotation <= self.max_rotation < 26, "rotation must be in range [1,25]"
class CaesarCipherDataset(ProceduralDataset):
"""Generates Caesar cipher encryption/decryption tasks"""
def __init__(self, config: CaesarCipherConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
# Load and preprocess text
text = read_data_file("in_the_year_2889.txt")
# Split into sentences and filter
sentences = [s.strip() for s in text.split(config.delimiter) if s.strip()]
# Process each sentence
self.valid_sentences = []
for sentence in sentences:
# Split into words and filter for alpha-only
words = [w.upper() for w in sentence.split() if w.isalpha()]
if self.config.min_words <= len(words) <= self.config.max_words:
self.valid_sentences.append(" ".join(words))
def _caesar_encrypt(self, text: str, rotation: int) -> str:
"""Apply Caesar cipher encryption with given rotation"""
result = []
for char in text:
if char.isalpha():
# Convert to 0-25 range, rotate, convert back to ASCII
base = ord("A")
rotated = (ord(char) - base + rotation) % 26
result.append(chr(base + rotated))
else:
result.append(char)
return "".join(result)
def __getitem__(self, idx: int) -> dict:
"""Generate a single Caesar cipher task"""
rng = Random(self.seed + idx)
# Select random sentence and rotation
sentence = rng.choice(self.valid_sentences)
num_words = len(sentence.split())
rotation = rng.randint(self.config.min_rotation, self.config.max_rotation)
# Generate cipher text
cipher_text = self._caesar_encrypt(sentence, rotation)
return {
"question": f"Decrypt this Caesar cipher text: {cipher_text}. Provide only the decrypted text as your final answer.",
"answer": sentence,
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"rotation": rotation,
"cipher_text": cipher_text,
"clear_text": sentence,
"num_words": num_words,
"difficulty": {
"words": (self.config.min_words, self.config.max_words),
"rotation": (self.config.min_rotation, self.config.max_rotation),
},
},
}
class CaesarCipherCurriculum(BaseCurriculum):
"""Curriculum for Caesar cipher task generation"""
def __init__(self):
super().__init__(CaesarCipherCurriculum.__name__, CaesarCipherConfig)
self._define_attributes(
RangeAttributeDefinition(
name="rotation",
levels=[5, 15, 25, 50],
description="Max rotation for cipher",
lower_field_name="min_rotation",
upper_field_name="max_rotation",
ensure_interval=True,
),
RangeAttributeDefinition(
name="words",
levels=[5, 15, 25, 50],
description="Max number of words",
lower_field_name="min_words",
upper_field_name="max_words",
ensure_interval=True,
),
)
register_dataset(DATASET_NAME, CaesarCipherDataset, CaesarCipherConfig, CaesarCipherCurriculum)