mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Refactor CaesarCipher
This commit is contained in:
parent
23aa6ca7e7
commit
5279ccf7e1
8 changed files with 513 additions and 159 deletions
|
|
@ -1,84 +1,60 @@
|
|||
"""Caesar cipher task generator"""
|
||||
"""Caesar cipher exercise that generates encryption/decryption tasks."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from string import ascii_uppercase
|
||||
from typing import List, Optional
|
||||
from typing import Dict, Any
|
||||
|
||||
from reasoning_gym.data import read_data_file
|
||||
class CaesarCipherExercise:
|
||||
"""Exercise generator for Caesar cipher encryption/decryption tasks."""
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
def __init__(self):
|
||||
self.curriculum = None
|
||||
|
||||
def generate(self, curriculum: Any) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a Caesar cipher problem using the curriculum.
|
||||
|
||||
@dataclass
|
||||
class CaesarCipherConfig:
|
||||
"""Configuration for Caesar cipher task generation"""
|
||||
Returns:
|
||||
Dict containing:
|
||||
- question: str (e.g. "Decrypt this Caesar cipher text: KHOOR")
|
||||
- answer: str (the decrypted text)
|
||||
- metadata: dict with details (rotation, cipher_text, clear_text)
|
||||
"""
|
||||
self.curriculum = curriculum
|
||||
template = curriculum.get_template(curriculum.rng)
|
||||
return template.eval(self, curriculum.rng)
|
||||
|
||||
delimiter: str = "." # Delimiter for splitting text into sentences
|
||||
min_words: int = 3 # Minimum words per sentence
|
||||
max_words: int = 20 # Maximum words per sentence
|
||||
min_rotation: int = 1 # Minimum Caesar rotation
|
||||
max_rotation: int = 25 # Maximum Caesar rotation
|
||||
seed: Optional[int] = None
|
||||
size: int = 500 # Virtual dataset size
|
||||
|
||||
def validate(self) -> None:
|
||||
"""Validate configuration parameters"""
|
||||
assert self.min_words > 0, "min_words must be positive"
|
||||
assert self.max_words >= self.min_words, "max_words must be >= min_words"
|
||||
assert 0 < self.min_rotation <= self.max_rotation < 26, "rotation must be in range [1,25]"
|
||||
|
||||
|
||||
class CaesarCipherDataset(ProceduralDataset):
|
||||
"""Generates Caesar cipher encryption/decryption tasks"""
|
||||
|
||||
def __init__(self, config: CaesarCipherConfig):
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
# Load and preprocess text
|
||||
text = read_data_file("in_the_year_2889.txt")
|
||||
|
||||
# Split into sentences and filter
|
||||
sentences = [s.strip() for s in text.split(config.delimiter) if s.strip()]
|
||||
|
||||
# Process each sentence
|
||||
self.valid_sentences = []
|
||||
for sentence in sentences:
|
||||
# Split into words and filter for alpha-only
|
||||
words = [w.upper() for w in sentence.split() if w.isalpha()]
|
||||
if self.config.min_words <= len(words) <= self.config.max_words:
|
||||
self.valid_sentences.append(" ".join(words))
|
||||
|
||||
def _caesar_encrypt(self, text: str, rotation: int) -> str:
|
||||
"""Apply Caesar cipher encryption with given rotation"""
|
||||
result = []
|
||||
for char in text:
|
||||
if char.isalpha():
|
||||
# Convert to 0-25 range, rotate, convert back to ASCII
|
||||
base = ord("A")
|
||||
rotated = (ord(char) - base + rotation) % 26
|
||||
result.append(chr(base + rotated))
|
||||
else:
|
||||
result.append(char)
|
||||
return "".join(result)
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single Caesar cipher task"""
|
||||
rng = Random(self.seed + idx)
|
||||
|
||||
# Select random sentence and rotation
|
||||
sentence = rng.choice(self.valid_sentences)
|
||||
rotation = rng.randint(self.config.min_rotation, self.config.max_rotation)
|
||||
|
||||
# Generate cipher text
|
||||
cipher_text = self._caesar_encrypt(sentence, rotation)
|
||||
def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse the template metadata into structured data.
|
||||
|
||||
The metadata structure is expected to be:
|
||||
{
|
||||
"cipher_text": {
|
||||
"encrypted_text": str, # The encrypted text
|
||||
"clear_text": str, # The original text
|
||||
"rotation": int # The rotation value
|
||||
}
|
||||
}
|
||||
Returns:
|
||||
Dictionary containing parsed data for evaluation
|
||||
"""
|
||||
return {
|
||||
"question": f"Decrypt this Caesar cipher text: {cipher_text}",
|
||||
"answer": sentence,
|
||||
"metadata": {"rotation": rotation, "cipher_text": cipher_text, "clear_text": sentence},
|
||||
"cipher_text": metadata["cipher_text"]["encrypted_text"],
|
||||
"clear_text": metadata["cipher_text"]["clear_text"],
|
||||
"rotation": metadata["cipher_text"]["rotation"]
|
||||
}
|
||||
|
||||
def _evaluate_expression(self, parsed: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Return the clear text for decryption problems.
|
||||
|
||||
register_dataset("caesar_cipher", CaesarCipherDataset, CaesarCipherConfig)
|
||||
Args:
|
||||
parsed: Dictionary containing:
|
||||
- cipher_text: str (the encrypted text)
|
||||
- clear_text: str (the original text)
|
||||
- rotation: int (the rotation value)
|
||||
Returns:
|
||||
String with the decrypted text (clear_text)
|
||||
"""
|
||||
# For the current curriculum, we only handle decryption
|
||||
# and the clear_text is already provided in the metadata
|
||||
return parsed["clear_text"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue