mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
Merge branch 'main' into codeio-sampler
This commit is contained in:
commit
3a5dc2080f
28 changed files with 932 additions and 164 deletions
|
|
@ -5,7 +5,7 @@ Reasoning Gym - A library of procedural dataset generators for training reasonin
|
|||
from . import algebra, algorithmic, arc, arithmetic, code, cognition, data, games, geometry, graphs, induction, logic
|
||||
from .factory import create_dataset, register_dataset
|
||||
|
||||
__version__ = "0.1.9"
|
||||
__version__ = "0.1.11"
|
||||
__all__ = [
|
||||
"arc",
|
||||
"algebra",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import random
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import sympy
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import random
|
||||
from dataclasses import dataclass
|
||||
from fractions import Fraction
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import sympy
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ No leading letter can be zero (unless allow_leading_zero=True).
|
|||
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@ from collections import deque
|
|||
from dataclasses import dataclass
|
||||
from functools import reduce
|
||||
from random import Random
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
||||
def min_moves_n(jug_capacities: List[int], target: int) -> Optional[int]:
|
||||
def min_moves_n(jug_capacities: list[int], target: int) -> Optional[int]:
|
||||
"""
|
||||
Compute the minimum number of moves required to have exactly `target` gallons
|
||||
in any one jug for a puzzle with multiple jugs.
|
||||
|
|
@ -73,7 +73,7 @@ def min_moves_n(jug_capacities: List[int], target: int) -> Optional[int]:
|
|||
return None
|
||||
|
||||
|
||||
def generate_puzzle(rng: Random, num_jugs: int = 3, difficulty: int = 6, max_attempts: int = 10000) -> Dict[str, any]:
|
||||
def generate_puzzle(rng: Random, num_jugs: int = 3, difficulty: int = 6, max_attempts: int = 10000) -> dict[str, Any]:
|
||||
"""
|
||||
Generate a multi-jug water puzzle.
|
||||
|
||||
|
|
@ -181,7 +181,7 @@ def verify_solution(puzzle, moves):
|
|||
return (any(w == target for w in state), states)
|
||||
|
||||
|
||||
def generate_jug_solution(jug_capacities: Tuple[int, int, int], target: int) -> List[str]:
|
||||
def generate_jug_solution(jug_capacities: tuple[int, int, int], target: int) -> list[str]:
|
||||
"""Solves the jug puzzle and returns a sequence of formatted steps."""
|
||||
capacities = list(jug_capacities)
|
||||
initial_state = (0, 0, 0)
|
||||
|
|
@ -283,14 +283,14 @@ Reply as a JSON-parsable list of moves which result in any of the jugs being fil
|
|||
"metadata": {"puzzle": puzzle},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Determine if the solution provided solves the Jugs task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
entry (dict[str, Any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import random
|
||||
import string
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ https://leetcode.com/problems/rotting-oranges/description/
|
|||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, Optional
|
||||
from typing import Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class Arc1DConfig:
|
|||
|
||||
def validate(self) -> None:
|
||||
"""Validate configuration parameters"""
|
||||
assert self.min_size > 0, "min_size must be positive"
|
||||
assert self.min_size >= 8, "min_size must be >= 8"
|
||||
assert self.max_size >= self.min_size, "max_size must be >= min_size"
|
||||
assert self.num_train > 0, "num_train must be positive"
|
||||
assert self.size > 0, "size must be positive"
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ def task_move_n_pix(rng: Random, size: int, move_pix: int, solid: bool) -> Optio
|
|||
def task_move_n_pix_wrapped(rng: Random, size: int, move_pix: int, solid: bool) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where a block is moved to the right by move_pix pixels with wrapping."""
|
||||
block_size = rng.randint(1, size)
|
||||
block_pos = rng.randint(0, size)
|
||||
block_pos = rng.randint(0, size - 1)
|
||||
|
||||
if solid:
|
||||
color = rng.randint(1, 9)
|
||||
|
|
@ -95,8 +95,8 @@ def task_block_touch_dot(rng: Random, size: int) -> Optional[dict[str, list[int]
|
|||
dot_color = 1
|
||||
block_color = rng.randint(2, 9)
|
||||
|
||||
block_size = rng.randint(1, size)
|
||||
dot_pos = rng.randint(0, size)
|
||||
block_size = rng.randint(1, size - 1)
|
||||
dot_pos = rng.randint(0, size - 1)
|
||||
|
||||
can_place_left = dot_pos >= block_size
|
||||
can_place_right = dot_pos + block_size < size
|
||||
|
|
@ -105,7 +105,7 @@ def task_block_touch_dot(rng: Random, size: int) -> Optional[dict[str, list[int]
|
|||
return None
|
||||
|
||||
if can_place_left and can_place_right:
|
||||
side = rng.choice(["left", "right"])
|
||||
side = rng.choice(("left", "right"))
|
||||
elif can_place_left:
|
||||
side = "left"
|
||||
else:
|
||||
|
|
@ -134,8 +134,8 @@ def task_block_touch_dot_n_pix(rng: Random, size: int, move_pix: int) -> Optiona
|
|||
dot_color = 2
|
||||
block_color = rng.randint(3, 9)
|
||||
|
||||
block_size = rng.randint(1, size)
|
||||
dot_pos = rng.randint(0, size)
|
||||
block_size = rng.randint(1, size - 1)
|
||||
dot_pos = rng.randint(0, size - 1)
|
||||
|
||||
can_place_left = dot_pos >= block_size
|
||||
can_place_right = dot_pos + block_size < size
|
||||
|
|
@ -144,7 +144,7 @@ def task_block_touch_dot_n_pix(rng: Random, size: int, move_pix: int) -> Optiona
|
|||
return None
|
||||
|
||||
if can_place_left and can_place_right:
|
||||
side = rng.choice(["left", "right"])
|
||||
side = rng.choice(("left", "right"))
|
||||
elif can_place_left:
|
||||
side = "left"
|
||||
else:
|
||||
|
|
@ -177,8 +177,8 @@ def task_block_scale_to_dot(rng: Random, size: int) -> Optional[dict[str, list[i
|
|||
dot_color = 2
|
||||
block_color = rng.randint(3, 9)
|
||||
|
||||
block_size = rng.randint(1, size)
|
||||
dot_pos = rng.randint(0, size)
|
||||
block_size = rng.randint(1, size - 1)
|
||||
dot_pos = rng.randint(0, size - 1)
|
||||
|
||||
can_place_left = dot_pos >= block_size
|
||||
can_place_right = dot_pos + block_size < size
|
||||
|
|
@ -187,7 +187,7 @@ def task_block_scale_to_dot(rng: Random, size: int) -> Optional[dict[str, list[i
|
|||
return None
|
||||
|
||||
if can_place_left and can_place_right:
|
||||
side = rng.choice(["left", "right"])
|
||||
side = rng.choice(("left", "right"))
|
||||
elif can_place_left:
|
||||
side = "left"
|
||||
else:
|
||||
|
|
@ -238,13 +238,9 @@ def task_two_points_and_fill(rng: Random, size: int) -> Optional[dict[str, list[
|
|||
def task_reflect_block_with_border_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where a block with a border pixel is reflected."""
|
||||
block_size = rng.randint(2, size)
|
||||
if block_size > size:
|
||||
return None
|
||||
|
||||
c1 = rng.randint(1, 9)
|
||||
c2 = rng.randint(1, 9)
|
||||
if c1 == c2:
|
||||
return None
|
||||
c2 = rng.choice(tuple(c for c in range(1, 9) if c != c1))
|
||||
|
||||
side = "left" if rng.random() < 0.5 else "right"
|
||||
pos = rng.randint(0, size - block_size)
|
||||
|
|
@ -265,22 +261,17 @@ def task_reflect_block_with_border_pixel(rng: Random, size: int) -> Optional[dic
|
|||
def task_reflect_block_with_border_pixel_random(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where a random-colored block with a border pixel is reflected."""
|
||||
block_size = rng.randint(2, size)
|
||||
if block_size > size:
|
||||
return None
|
||||
|
||||
side = "left" if rng.random() < 0.5 else "right"
|
||||
pos = rng.randint(0, size - block_size)
|
||||
|
||||
block = [rng.randint(1, 9) for _ in range(block_size)]
|
||||
border_color = rng.randint(1, 9)
|
||||
other_colors = tuple(c for c in range(1, 9) if c != border_color)
|
||||
block = [rng.choice(other_colors) for _ in range(block_size)]
|
||||
|
||||
if side == "left":
|
||||
if block[0] == border_color:
|
||||
return None
|
||||
block[0] = border_color
|
||||
else:
|
||||
if block[block_size - 1] == border_color:
|
||||
return None
|
||||
block[block_size - 1] = border_color
|
||||
|
||||
question = write_block(pos, block, gen_field(size))
|
||||
|
|
@ -294,8 +285,8 @@ def task_reflect_block_around_dot(rng: Random, size: int) -> Optional[dict[str,
|
|||
"""Generate a task where a block is reflected around a dot."""
|
||||
dot_color = 2
|
||||
|
||||
dot_pos = rng.randint(0, size)
|
||||
block_size = rng.randint(1, size)
|
||||
dot_pos = rng.randint(0, size - 1)
|
||||
block_size = rng.randint(1, size - 1)
|
||||
block_pos = rng.randint(0, size - block_size)
|
||||
block_end = block_pos + block_size - 1
|
||||
|
||||
|
|
@ -331,8 +322,6 @@ def task_reflect_block_around_dot(rng: Random, size: int) -> Optional[dict[str,
|
|||
def task_block_and_noise_remove(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where noise around a block needs to be removed."""
|
||||
block_size = rng.randint(2, size)
|
||||
if block_size > size:
|
||||
return None
|
||||
|
||||
block_pos = rng.randint(0, size - block_size)
|
||||
color = rng.randint(1, 9)
|
||||
|
|
@ -356,7 +345,7 @@ def task_block_and_noise_remove(rng: Random, size: int) -> Optional[dict[str, li
|
|||
noise_positions = []
|
||||
|
||||
for _ in range(noise_count):
|
||||
allowed = [i for i in range(size) if not forbidden[i]]
|
||||
allowed = tuple(i for i in range(size) if not forbidden[i])
|
||||
if not allowed:
|
||||
break
|
||||
noise_pos = rng.choice(allowed)
|
||||
|
|
@ -385,8 +374,6 @@ def task_block_and_noise_remove_inside(rng: Random, size: int) -> Optional[dict[
|
|||
return None
|
||||
|
||||
block_size = rng.randint(6, size)
|
||||
if block_size > size:
|
||||
return None
|
||||
|
||||
block_pos = rng.randint(0, size - block_size)
|
||||
color = rng.randint(1, 9)
|
||||
|
|
@ -471,7 +458,7 @@ def task_copy_block_to_dots_colors(rng: Random, size: int) -> Optional[dict[str,
|
|||
dot_colors = []
|
||||
pos = block_size + block_size // 2 + 1
|
||||
|
||||
while pos < size - block_size:
|
||||
while pos <= size - block_size:
|
||||
if rng.random() < 0.5:
|
||||
dot_color = rng.randint(1, 9)
|
||||
dot_positions.append(pos)
|
||||
|
|
@ -759,13 +746,14 @@ def task_duplicate_block_from_seeds(rng: Random, size: int) -> Optional[dict[str
|
|||
return None
|
||||
|
||||
# Position block with space for seeds
|
||||
block_pos = rng.randint(2, size - block_size - 1)
|
||||
block_pos = rng.randint(2, size - block_size - 2)
|
||||
|
||||
# Decide seed placement
|
||||
left_seed = rng.random() < 0.5
|
||||
right_seed = rng.random() < 0.5
|
||||
if not (left_seed or right_seed):
|
||||
return None
|
||||
left_seed = False
|
||||
right_seed = False
|
||||
while not left_seed and not right_seed:
|
||||
left_seed = rng.random() < 0.5
|
||||
right_seed = rng.random() < 0.5
|
||||
|
||||
# Create input
|
||||
question = gen_field(size)
|
||||
|
|
@ -814,12 +802,13 @@ def task_duplicate_block_from_seeds(rng: Random, size: int) -> Optional[dict[str
|
|||
|
||||
def task_fill_from_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where a pixel fills in one direction until hitting another pixel."""
|
||||
block_size = rng.randint(3, 6)
|
||||
if block_size >= size - 2:
|
||||
if size < 8:
|
||||
return None
|
||||
|
||||
block_size = rng.randint(3, size - 5)
|
||||
|
||||
# Position block with space for seed
|
||||
block_pos = rng.randint(1, size - block_size - 1)
|
||||
block_pos = rng.randint(2, size - block_size - 2)
|
||||
|
||||
# Create input
|
||||
question = gen_field(size)
|
||||
|
|
@ -830,9 +819,9 @@ def task_fill_from_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]
|
|||
question[block_pos + i] = block_color
|
||||
|
||||
# Place seed pixel and determine fill direction
|
||||
seed_color = rng.randint(1, 9)
|
||||
while seed_color == block_color:
|
||||
seed_color = rng.randint(1, 9)
|
||||
seed_color = rng.randint(1, 8)
|
||||
if seed_color >= block_color:
|
||||
seed_color += 1
|
||||
|
||||
is_left = rng.random() < 0.5
|
||||
|
||||
|
|
@ -858,48 +847,51 @@ def task_fill_from_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]
|
|||
|
||||
def task_mark_size_two_blocks(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
|
||||
"""Generate a task where size-2 blocks are marked with surrounding pixels."""
|
||||
blocks = []
|
||||
pos = 0
|
||||
if size < 8:
|
||||
return None
|
||||
|
||||
# Generate blocks with minimum gap of 2
|
||||
# Start with one size-2 block
|
||||
blocks = [2]
|
||||
pos = 4 # Space for first block (2) + gap (2)
|
||||
|
||||
# Generate more blocks
|
||||
while pos < size:
|
||||
if rng.random() < 0.4:
|
||||
block_size = rng.randint(1, 3)
|
||||
# Check if we have space for block and potential markers
|
||||
needed_space = block_size + (2 if block_size == 2 else 0)
|
||||
if pos + needed_space < size:
|
||||
blocks.append((pos, block_size))
|
||||
pos += block_size + 2 # Minimum gap of 2
|
||||
if pos + block_size <= size:
|
||||
blocks.append(block_size)
|
||||
pos += block_size + 2 # block + gap
|
||||
else:
|
||||
blocks.append(0)
|
||||
pos += 1
|
||||
|
||||
pos += 1
|
||||
# Shuffle block sizes
|
||||
rng.shuffle(blocks)
|
||||
|
||||
if len(blocks) < 2:
|
||||
return None
|
||||
# Assign positions with proper gaps
|
||||
block_positions = []
|
||||
pos = 0
|
||||
|
||||
# Verify gaps between blocks (including markers)
|
||||
valid = True
|
||||
for i in range(len(blocks) - 1):
|
||||
pos1, size1 = blocks[i]
|
||||
pos2, _ = blocks[i + 1]
|
||||
needed_gap = 3 if size1 == 2 else 2
|
||||
if pos2 - (pos1 + size1) < needed_gap:
|
||||
valid = False
|
||||
break
|
||||
if not valid:
|
||||
return None
|
||||
for block_size in blocks:
|
||||
if block_size == 0:
|
||||
pos += 1
|
||||
else:
|
||||
block_positions.append((pos, block_size))
|
||||
pos += block_size + 2 # Move past block + gap
|
||||
|
||||
# Create input with blocks
|
||||
question = gen_field(size)
|
||||
for pos, block_size in blocks:
|
||||
# Place block
|
||||
for pos, block_size in block_positions:
|
||||
block_color = rng.randint(1, 8)
|
||||
if block_color >= 3: # avoid marker color 3
|
||||
block_color += 1
|
||||
for i in range(block_size):
|
||||
question[pos + i] = 1
|
||||
question[pos + i] = block_color
|
||||
|
||||
# Create answer with markers
|
||||
answer = question.copy()
|
||||
for pos, block_size in blocks:
|
||||
for pos, block_size in block_positions:
|
||||
if block_size == 2:
|
||||
# Add markers for size 2 blocks
|
||||
if pos > 0:
|
||||
answer[pos - 1] = 3
|
||||
if pos + block_size < size:
|
||||
|
|
@ -946,7 +938,10 @@ def task_fill_until_collision(rng: Random, size: int) -> Optional[dict[str, list
|
|||
|
||||
# Color random pixels
|
||||
for pos in positions:
|
||||
question[pos] = rng.randint(1, 9)
|
||||
c = rng.randint(1, 8)
|
||||
if c >= 5: # don't use side marker color 5
|
||||
c += 1
|
||||
question[pos] = c
|
||||
|
||||
positions.sort()
|
||||
|
||||
|
|
@ -1039,8 +1034,8 @@ def task_color_left_half_blocks(rng: Random, size: int) -> Optional[dict[str, li
|
|||
# Generate blocks with gap 1
|
||||
while pos < size:
|
||||
if rng.random() < 0.4:
|
||||
block_size = rng.randint(2, 8)
|
||||
if pos + block_size >= size:
|
||||
block_size = rng.randint(2, size // 2)
|
||||
if pos + block_size > size:
|
||||
break
|
||||
|
||||
blocks.append((pos, block_size))
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass, field
|
||||
from random import Random
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
from .board_format import ARC_PROMPT_TEMPLATE, BoardFormattingOptions, format_board, format_board_pair, parse_board
|
||||
|
|
|
|||
|
|
@ -1,23 +1,23 @@
|
|||
# types
|
||||
|
||||
|
||||
from typing import Any, Callable, Container, FrozenSet, Tuple, Union
|
||||
from typing import Any, Callable, Container, FrozenSet, Union
|
||||
|
||||
Boolean = bool
|
||||
Integer = int
|
||||
IntegerTuple = Tuple[Integer, Integer]
|
||||
Numerical = Union[Integer, IntegerTuple]
|
||||
Integertuple = tuple[Integer, Integer]
|
||||
Numerical = Union[Integer, Integertuple]
|
||||
IntegerSet = FrozenSet[Integer]
|
||||
Grid = Tuple[Tuple[Integer]]
|
||||
Cell = Tuple[Integer, IntegerTuple]
|
||||
Grid = tuple[tuple[Integer]]
|
||||
Cell = tuple[Integer, Integertuple]
|
||||
Object = FrozenSet[Cell]
|
||||
Objects = FrozenSet[Object]
|
||||
Indices = FrozenSet[IntegerTuple]
|
||||
Indices = FrozenSet[Integertuple]
|
||||
IndicesSet = FrozenSet[Indices]
|
||||
Patch = Union[Object, Indices]
|
||||
Element = Union[Object, Grid]
|
||||
Piece = Union[Grid, Patch]
|
||||
TupleTuple = Tuple[Tuple]
|
||||
tupletuple = tuple[tuple]
|
||||
ContainerContainer = Container[Container]
|
||||
|
||||
|
||||
|
|
@ -160,17 +160,17 @@ def difference(a: Container, b: Container) -> Container:
|
|||
return type(a)(e for e in a if e not in b)
|
||||
|
||||
|
||||
def dedupe(iterable: Tuple) -> Tuple:
|
||||
def dedupe(iterable: tuple) -> tuple:
|
||||
"""remove duplicates"""
|
||||
return tuple(e for i, e in enumerate(iterable) if iterable.index(e) == i)
|
||||
|
||||
|
||||
def order(container: Container, compfunc: Callable) -> Tuple:
|
||||
def order(container: Container, compfunc: Callable) -> tuple:
|
||||
"""order container by custom key"""
|
||||
return tuple(sorted(container, key=compfunc))
|
||||
|
||||
|
||||
def repeat(item: Any, num: Integer) -> Tuple:
|
||||
def repeat(item: Any, num: Integer) -> tuple:
|
||||
"""repetition of item within vector"""
|
||||
return tuple(item for i in range(num))
|
||||
|
||||
|
|
@ -277,12 +277,12 @@ def positive(x: Integer) -> Boolean:
|
|||
return x > 0
|
||||
|
||||
|
||||
def toivec(i: Integer) -> IntegerTuple:
|
||||
def toivec(i: Integer) -> Integertuple:
|
||||
"""vector pointing vertically"""
|
||||
return (i, 0)
|
||||
|
||||
|
||||
def tojvec(j: Integer) -> IntegerTuple:
|
||||
def tojvec(j: Integer) -> Integertuple:
|
||||
"""vector pointing horizontally"""
|
||||
return (0, j)
|
||||
|
||||
|
|
@ -302,7 +302,7 @@ def extract(container: Container, condition: Callable) -> Any:
|
|||
return next(e for e in container if condition(e))
|
||||
|
||||
|
||||
def totuple(container: FrozenSet) -> Tuple:
|
||||
def totuple(container: FrozenSet) -> tuple:
|
||||
"""conversion to tuple"""
|
||||
return tuple(container)
|
||||
|
||||
|
|
@ -332,12 +332,12 @@ def other(container: Container, value: Any) -> Any:
|
|||
return first(remove(value, container))
|
||||
|
||||
|
||||
def interval(start: Integer, stop: Integer, step: Integer) -> Tuple:
|
||||
def interval(start: Integer, stop: Integer, step: Integer) -> tuple:
|
||||
"""range"""
|
||||
return tuple(range(start, stop, step))
|
||||
|
||||
|
||||
def astuple(a: Integer, b: Integer) -> IntegerTuple:
|
||||
def astuple(a: Integer, b: Integer) -> Integertuple:
|
||||
"""constructs a tuple"""
|
||||
return (a, b)
|
||||
|
||||
|
|
@ -347,7 +347,7 @@ def product(a: Container, b: Container) -> FrozenSet:
|
|||
return frozenset((i, j) for j in b for i in a)
|
||||
|
||||
|
||||
def pair(a: Tuple, b: Tuple) -> TupleTuple:
|
||||
def pair(a: tuple, b: tuple) -> tupletuple:
|
||||
"""zipping of two tuples"""
|
||||
return tuple(zip(a, b))
|
||||
|
||||
|
|
@ -421,12 +421,12 @@ def mapply(function: Callable, container: ContainerContainer) -> FrozenSet:
|
|||
return merge(apply(function, container))
|
||||
|
||||
|
||||
def papply(function: Callable, a: Tuple, b: Tuple) -> Tuple:
|
||||
def papply(function: Callable, a: tuple, b: tuple) -> tuple:
|
||||
"""apply function on two vectors"""
|
||||
return tuple(function(i, j) for i, j in zip(a, b))
|
||||
|
||||
|
||||
def mpapply(function: Callable, a: Tuple, b: Tuple) -> Tuple:
|
||||
def mpapply(function: Callable, a: tuple, b: tuple) -> tuple:
|
||||
"""apply function on two vectors and merge"""
|
||||
return merge(papply(function, a, b))
|
||||
|
||||
|
|
@ -466,7 +466,7 @@ def width(piece: Piece) -> Integer:
|
|||
return rightmost(piece) - leftmost(piece) + 1
|
||||
|
||||
|
||||
def shape(piece: Piece) -> IntegerTuple:
|
||||
def shape(piece: Piece) -> Integertuple:
|
||||
"""height and width of grid or patch"""
|
||||
return (height(piece), width(piece))
|
||||
|
||||
|
|
@ -503,27 +503,27 @@ def ofcolor(grid: Grid, value: Integer) -> Indices:
|
|||
return frozenset((i, j) for i, r in enumerate(grid) for j, v in enumerate(r) if v == value)
|
||||
|
||||
|
||||
def ulcorner(patch: Patch) -> IntegerTuple:
|
||||
def ulcorner(patch: Patch) -> Integertuple:
|
||||
"""index of upper left corner"""
|
||||
return tuple(map(min, zip(*toindices(patch))))
|
||||
|
||||
|
||||
def urcorner(patch: Patch) -> IntegerTuple:
|
||||
def urcorner(patch: Patch) -> Integertuple:
|
||||
"""index of upper right corner"""
|
||||
return tuple(map(lambda ix: {0: min, 1: max}[ix[0]](ix[1]), enumerate(zip(*toindices(patch)))))
|
||||
|
||||
|
||||
def llcorner(patch: Patch) -> IntegerTuple:
|
||||
def llcorner(patch: Patch) -> Integertuple:
|
||||
"""index of lower left corner"""
|
||||
return tuple(map(lambda ix: {0: max, 1: min}[ix[0]](ix[1]), enumerate(zip(*toindices(patch)))))
|
||||
|
||||
|
||||
def lrcorner(patch: Patch) -> IntegerTuple:
|
||||
def lrcorner(patch: Patch) -> Integertuple:
|
||||
"""index of lower right corner"""
|
||||
return tuple(map(max, zip(*toindices(patch))))
|
||||
|
||||
|
||||
def crop(grid: Grid, start: IntegerTuple, dims: IntegerTuple) -> Grid:
|
||||
def crop(grid: Grid, start: Integertuple, dims: Integertuple) -> Grid:
|
||||
"""subgrid specified by start and dimension"""
|
||||
return tuple(r[start[1] : start[1] + dims[1]] for r in grid[start[0] : start[0] + dims[0]])
|
||||
|
||||
|
|
@ -542,7 +542,7 @@ def recolor(value: Integer, patch: Patch) -> Object:
|
|||
return frozenset((value, index) for index in toindices(patch))
|
||||
|
||||
|
||||
def shift(patch: Patch, directions: IntegerTuple) -> Patch:
|
||||
def shift(patch: Patch, directions: Integertuple) -> Patch:
|
||||
"""shift patch"""
|
||||
if len(patch) == 0:
|
||||
return patch
|
||||
|
|
@ -559,19 +559,19 @@ def normalize(patch: Patch) -> Patch:
|
|||
return shift(patch, (-uppermost(patch), -leftmost(patch)))
|
||||
|
||||
|
||||
def dneighbors(loc: IntegerTuple) -> Indices:
|
||||
def dneighbors(loc: Integertuple) -> Indices:
|
||||
"""directly adjacent indices"""
|
||||
return frozenset({(loc[0] - 1, loc[1]), (loc[0] + 1, loc[1]), (loc[0], loc[1] - 1), (loc[0], loc[1] + 1)})
|
||||
|
||||
|
||||
def ineighbors(loc: IntegerTuple) -> Indices:
|
||||
def ineighbors(loc: Integertuple) -> Indices:
|
||||
"""diagonally adjacent indices"""
|
||||
return frozenset(
|
||||
{(loc[0] - 1, loc[1] - 1), (loc[0] - 1, loc[1] + 1), (loc[0] + 1, loc[1] - 1), (loc[0] + 1, loc[1] + 1)}
|
||||
)
|
||||
|
||||
|
||||
def neighbors(loc: IntegerTuple) -> Indices:
|
||||
def neighbors(loc: Integertuple) -> Indices:
|
||||
"""adjacent indices"""
|
||||
return dneighbors(loc) | ineighbors(loc)
|
||||
|
||||
|
|
@ -690,7 +690,7 @@ def bordering(patch: Patch, grid: Grid) -> Boolean:
|
|||
)
|
||||
|
||||
|
||||
def centerofmass(patch: Patch) -> IntegerTuple:
|
||||
def centerofmass(patch: Patch) -> Integertuple:
|
||||
"""center of mass"""
|
||||
return tuple(map(lambda x: sum(x) // len(patch), zip(*toindices(patch))))
|
||||
|
||||
|
|
@ -895,14 +895,14 @@ def subgrid(patch: Patch, grid: Grid) -> Grid:
|
|||
return crop(grid, ulcorner(patch), shape(patch))
|
||||
|
||||
|
||||
def hsplit(grid: Grid, n: Integer) -> Tuple:
|
||||
def hsplit(grid: Grid, n: Integer) -> tuple:
|
||||
"""split grid horizontally"""
|
||||
h, w = len(grid), len(grid[0]) // n
|
||||
offset = len(grid[0]) % n != 0
|
||||
return tuple(crop(grid, (0, w * i + i * offset), (h, w)) for i in range(n))
|
||||
|
||||
|
||||
def vsplit(grid: Grid, n: Integer) -> Tuple:
|
||||
def vsplit(grid: Grid, n: Integer) -> tuple:
|
||||
"""split grid vertically"""
|
||||
h, w = len(grid) // n, len(grid[0])
|
||||
offset = len(grid) % n != 0
|
||||
|
|
@ -933,12 +933,12 @@ def switch(grid: Grid, a: Integer, b: Integer) -> Grid:
|
|||
return tuple(tuple(v if (v != a and v != b) else {a: b, b: a}[v] for v in r) for r in grid)
|
||||
|
||||
|
||||
def center(patch: Patch) -> IntegerTuple:
|
||||
def center(patch: Patch) -> Integertuple:
|
||||
"""center of the patch"""
|
||||
return (uppermost(patch) + height(patch) // 2, leftmost(patch) + width(patch) // 2)
|
||||
|
||||
|
||||
def position(a: Patch, b: Patch) -> IntegerTuple:
|
||||
def position(a: Patch, b: Patch) -> Integertuple:
|
||||
"""relative position between two patches"""
|
||||
ia, ja = center(toindices(a))
|
||||
ib, jb = center(toindices(b))
|
||||
|
|
@ -952,7 +952,7 @@ def position(a: Patch, b: Patch) -> IntegerTuple:
|
|||
return (-1, 1 if ja < jb else -1)
|
||||
|
||||
|
||||
def index(grid: Grid, loc: IntegerTuple) -> Integer:
|
||||
def index(grid: Grid, loc: Integertuple) -> Integer:
|
||||
"""color at location"""
|
||||
i, j = loc
|
||||
h, w = len(grid), len(grid[0])
|
||||
|
|
@ -961,7 +961,7 @@ def index(grid: Grid, loc: IntegerTuple) -> Integer:
|
|||
return grid[loc[0]][loc[1]]
|
||||
|
||||
|
||||
def canvas(value: Integer, dimensions: IntegerTuple) -> Grid:
|
||||
def canvas(value: Integer, dimensions: Integertuple) -> Grid:
|
||||
"""grid construction"""
|
||||
return tuple(tuple(value for j in range(dimensions[1])) for i in range(dimensions[0]))
|
||||
|
||||
|
|
@ -971,7 +971,7 @@ def corners(patch: Patch) -> Indices:
|
|||
return frozenset({ulcorner(patch), urcorner(patch), llcorner(patch), lrcorner(patch)})
|
||||
|
||||
|
||||
def connect(a: IntegerTuple, b: IntegerTuple) -> Indices:
|
||||
def connect(a: Integertuple, b: Integertuple) -> Indices:
|
||||
"""line between two points"""
|
||||
ai, aj = a
|
||||
bi, bj = b
|
||||
|
|
@ -1000,7 +1000,7 @@ def trim(grid: Grid) -> Grid:
|
|||
return tuple(r[1:-1] for r in grid[1:-1])
|
||||
|
||||
|
||||
def move(grid: Grid, obj: Object, offset: IntegerTuple) -> Grid:
|
||||
def move(grid: Grid, obj: Object, offset: Integertuple) -> Grid:
|
||||
"""move object on grid"""
|
||||
return paint(cover(grid, obj), shift(obj, offset))
|
||||
|
||||
|
|
@ -1025,12 +1025,12 @@ def righthalf(grid: Grid) -> Grid:
|
|||
return rot270(bottomhalf(rot90(grid)))
|
||||
|
||||
|
||||
def vfrontier(location: IntegerTuple) -> Indices:
|
||||
def vfrontier(location: Integertuple) -> Indices:
|
||||
"""vertical frontier"""
|
||||
return frozenset((i, location[1]) for i in range(30))
|
||||
|
||||
|
||||
def hfrontier(location: IntegerTuple) -> Indices:
|
||||
def hfrontier(location: Integertuple) -> Indices:
|
||||
"""horizontal frontier"""
|
||||
return frozenset((location[0], j) for j in range(30))
|
||||
|
||||
|
|
@ -1052,7 +1052,7 @@ def delta(patch: Patch) -> Indices:
|
|||
return backdrop(patch) - toindices(patch)
|
||||
|
||||
|
||||
def gravitate(source: Patch, destination: Patch) -> IntegerTuple:
|
||||
def gravitate(source: Patch, destination: Patch) -> Integertuple:
|
||||
"""direction to move source until adjacent to destination"""
|
||||
source_i, source_j = center(source)
|
||||
destination_i, destination_j = center(destination)
|
||||
|
|
@ -1108,7 +1108,7 @@ def box(patch: Patch) -> Indices:
|
|||
return frozenset(vlines | hlines)
|
||||
|
||||
|
||||
def shoot(start: IntegerTuple, direction: IntegerTuple) -> Indices:
|
||||
def shoot(start: Integertuple, direction: Integertuple) -> Indices:
|
||||
"""line from starting point and direction"""
|
||||
return connect(start, (start[0] + 42 * direction[0], start[1] + 42 * direction[1]))
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import random
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import math
|
||||
from fractions import Fraction
|
||||
from random import Random
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
from reasoning_gym.utils import format_number, is_integer
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from fractions import Fraction
|
||||
from random import Random
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
from reasoning_gym.utils import format_number, is_integer
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, Optional
|
||||
from typing import Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
@ -20,7 +19,7 @@ class NeedleHaystackConfig:
|
|||
assert self.num_statements < 168387000, f"num_statements must be less than {168387000}"
|
||||
|
||||
|
||||
def generate_unique_triplets(names: List[str], verbs: List[str], subjects: List[str], n: int, rng) -> Dict[str, Any]:
|
||||
def generate_unique_triplets(names: list[str], verbs: list[str], subjects: list[str], n: int, rng) -> dict[str, Any]:
|
||||
"""
|
||||
Generate n unique random triplets (name, verb, subject) without generating the full Cartesian product in memory.
|
||||
|
||||
|
|
@ -29,14 +28,14 @@ def generate_unique_triplets(names: List[str], verbs: List[str], subjects: List[
|
|||
randomly chosen as the 'needle'.
|
||||
|
||||
Args:
|
||||
names (List[str]): List of names.
|
||||
verbs (List[str]): List of verbs.
|
||||
subjects (List[str]): List of subjects.
|
||||
names (list[str]): List of names.
|
||||
verbs (list[str]): List of verbs.
|
||||
subjects (list[str]): List of subjects.
|
||||
n (int): Number of unique triplets to generate.
|
||||
rng (random.Random): A pre-seeded random number generator.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary with:
|
||||
dict[str, Any]: A dictionary with:
|
||||
- "triplets": a list of n unique triplets (tuples of (name, verb, subject)),
|
||||
- "needle": one triplet randomly chosen from the list.
|
||||
|
||||
|
|
@ -47,7 +46,7 @@ def generate_unique_triplets(names: List[str], verbs: List[str], subjects: List[
|
|||
|
||||
# Use a range for memory efficiency and sample n unique indices.
|
||||
indices = rng.sample(range(total_possible), n)
|
||||
triplets: List[Tuple[str, str, str]] = []
|
||||
triplets: list[tuple[str, str, str]] = []
|
||||
|
||||
num_verbs = len(verbs)
|
||||
num_subjects = len(subjects)
|
||||
|
|
@ -101,12 +100,12 @@ class NeedleHaystackDataset(ProceduralDataset):
|
|||
"metadata": {"question": question},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Determine if the solution provided solves the task.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
entry (dict[str, Any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Game tasks for training reasoning capabilities:
|
|||
"""
|
||||
|
||||
from .countdown import CountdownConfig, CountdownDataset
|
||||
from .emoji_mystery import EmojiMysteryConfig, EmojiMysteryDataset
|
||||
from .futoshiki import FutoshikiConfig, FutoshikiDataset
|
||||
from .knight_swap import KnightSwapConfig, KnightSwapDataset
|
||||
from .maze import MazeConfig, MazeDataset
|
||||
|
|
@ -21,6 +22,8 @@ from .tsumego import TsumegoConfig, TsumegoDataset
|
|||
__all__ = [
|
||||
"CountdownConfig",
|
||||
"CountdownDataset",
|
||||
"EmojiMysteryConfig",
|
||||
"EmojiMysteryDataset",
|
||||
"FutoshikiConfig",
|
||||
"FutoshikiDataset",
|
||||
"MiniSudokuConfig",
|
||||
|
|
|
|||
235
reasoning_gym/games/emoji_mystery.py
Normal file
235
reasoning_gym/games/emoji_mystery.py
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
_EMOJIS = [
|
||||
"😀",
|
||||
"😃",
|
||||
"😄",
|
||||
"😁",
|
||||
"😆",
|
||||
"😅",
|
||||
"🤣",
|
||||
"😂",
|
||||
"🙂",
|
||||
"🙃",
|
||||
"😉",
|
||||
"😊",
|
||||
"😇",
|
||||
"🥰",
|
||||
"😍",
|
||||
"🤩",
|
||||
"😘",
|
||||
"😗",
|
||||
"😚",
|
||||
"😙",
|
||||
"🥲",
|
||||
"😋",
|
||||
"😛",
|
||||
"😜",
|
||||
"🤪",
|
||||
"😝",
|
||||
"🤑",
|
||||
"🤗",
|
||||
"🤭",
|
||||
"🤫",
|
||||
"🤔",
|
||||
"🤐",
|
||||
"🤨",
|
||||
"😐",
|
||||
"😑",
|
||||
"😶",
|
||||
"😏",
|
||||
"😒",
|
||||
"🙄",
|
||||
"😬",
|
||||
"😮",
|
||||
"😯",
|
||||
"😲",
|
||||
"😳",
|
||||
"🥺",
|
||||
"😦",
|
||||
"😧",
|
||||
"😨",
|
||||
"😰",
|
||||
"😥",
|
||||
"😢",
|
||||
"😭",
|
||||
"😱",
|
||||
"😖",
|
||||
"😣",
|
||||
"😞",
|
||||
"😓",
|
||||
"😩",
|
||||
"😫",
|
||||
"🥱",
|
||||
"😤",
|
||||
"😡",
|
||||
"😠",
|
||||
"🤬",
|
||||
"😈",
|
||||
"👿",
|
||||
"💀",
|
||||
"☠",
|
||||
"💩",
|
||||
"🤡",
|
||||
"👹",
|
||||
"👺",
|
||||
"👻",
|
||||
"👽",
|
||||
"👾",
|
||||
"🤖",
|
||||
"😺",
|
||||
"😸",
|
||||
"😹",
|
||||
"😻",
|
||||
"😼",
|
||||
"😽",
|
||||
"🙀",
|
||||
"😿",
|
||||
"😾",
|
||||
"🙈",
|
||||
"🙉",
|
||||
"🙊",
|
||||
"💋",
|
||||
"💌",
|
||||
"💘",
|
||||
"💝",
|
||||
"💖",
|
||||
"💗",
|
||||
"💓",
|
||||
"💞",
|
||||
"💕",
|
||||
"💟",
|
||||
"❣",
|
||||
"💔",
|
||||
"❤️",
|
||||
"🧡",
|
||||
"💛",
|
||||
"💚",
|
||||
"💙",
|
||||
"💜",
|
||||
"🤎",
|
||||
"🖤",
|
||||
"🤍",
|
||||
]
|
||||
|
||||
|
||||
hint_function = """
|
||||
```python
|
||||
def variance_selector_to_byte(variation_selector):
|
||||
variation_selector_codepoint = ord(variation_selector)
|
||||
if 0xFE00 <= variation_selector_codepoint <= 0xFE0F:
|
||||
return variation_selector_codepoint - 0xFE00
|
||||
elif 0xE0100 <= variation_selector_codepoint <= 0xE01EF:
|
||||
return variation_selector_codepoint - 0xE0100 + 16
|
||||
else:
|
||||
return None
|
||||
def decode(encoded_sentence):
|
||||
decoded_bytes = []
|
||||
variation_selectors_part = encoded_sentence[1:]
|
||||
for char in variation_selectors_part:
|
||||
byte_val = variance_selector_to_byte(char)
|
||||
if byte_val is not None:
|
||||
decoded_bytes.append(byte_val)
|
||||
return bytes(decoded_bytes).decode('utf-8')
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
QUESTION_TEMPLATE = "\n".join(
|
||||
[
|
||||
"The following emoji is encoded with a sentence.",
|
||||
"Decode the following sentence from the emoji: {sentence}",
|
||||
"Here is a hint: {hint_function}",
|
||||
"Return the secret sentence as your final answer.",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmojiMysteryConfig:
|
||||
"""Configuration for Emoji Mystery task generation"""
|
||||
|
||||
size: int = 1000
|
||||
seed: Optional[int] = None
|
||||
min_words_in_sentence: int = 3
|
||||
max_words_in_sentence: int = 35
|
||||
|
||||
def validate(self):
|
||||
assert self.min_words_in_sentence > 0, "min_words_in_sentence must be positive"
|
||||
assert (
|
||||
self.max_words_in_sentence >= self.min_words_in_sentence
|
||||
), "max_words_in_sentence must be >= min_words_in_sentence"
|
||||
assert self.size > 0, "size must be positive"
|
||||
|
||||
|
||||
class EmojiMysteryDataset(ProceduralDataset):
|
||||
def __init__(self, config: EmojiMysteryConfig):
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
text = read_data_file("in_the_year_2889.txt")
|
||||
self.emojis = _EMOJIS
|
||||
self.sentences = [
|
||||
sentence.strip()
|
||||
for sentence in re.findall(r"[^.!?]+[.!?]", text)
|
||||
if self.config.min_words_in_sentence
|
||||
<= len(re.findall(r"\b\w+\b", sentence))
|
||||
<= self.config.max_words_in_sentence
|
||||
]
|
||||
|
||||
def __getitem__(self, idx: int) -> dict[str, Any]:
|
||||
rng = Random(self.seed + idx)
|
||||
secret_emoji = rng.choice(self.emojis)
|
||||
secret_sentence = rng.choice(self.sentences).strip().replace("\n", " ")
|
||||
encoded_sentence = self.encode(secret_sentence, secret_emoji)
|
||||
question = QUESTION_TEMPLATE.format(sentence=encoded_sentence, hint_function=hint_function)
|
||||
return {"question": question, "answer": secret_sentence, "metadata": {"emoji": secret_emoji}}
|
||||
|
||||
def variance_selector_to_byte(self, variation_selector: str) -> Optional[int]:
|
||||
variation_selector_codepoint = ord(variation_selector)
|
||||
if 0xFE00 <= variation_selector_codepoint <= 0xFE0F:
|
||||
return variation_selector_codepoint - 0xFE00
|
||||
elif 0xE0100 <= variation_selector_codepoint <= 0xE01EF:
|
||||
return variation_selector_codepoint - 0xE0100 + 16
|
||||
|
||||
def decode(self, encoded_sentence: str) -> str:
|
||||
decoded_bytes = []
|
||||
variation_selectors_part = encoded_sentence[1:]
|
||||
|
||||
for char in variation_selectors_part:
|
||||
byte_val = self.variance_selector_to_byte(char)
|
||||
if byte_val is not None:
|
||||
decoded_bytes.append(byte_val)
|
||||
return bytes(decoded_bytes).decode("utf-8")
|
||||
|
||||
def byte_to_variance_selector(self, byte: bytes) -> bytes:
|
||||
if byte < 16:
|
||||
return chr(0xFE00 + byte)
|
||||
else:
|
||||
return chr(0xE0100 + (byte - 16))
|
||||
|
||||
def encode(self, sentence: str, base: str) -> str:
|
||||
encoded_bytes = sentence.encode("utf-8")
|
||||
return base + "".join(self.byte_to_variance_selector(b) for b in encoded_bytes)
|
||||
|
||||
def score_answer(self, answer: str | None, entry: dict[str, Any]) -> int:
|
||||
reward = 0.0
|
||||
if answer is not None:
|
||||
try:
|
||||
if answer == entry["answer"]:
|
||||
return 1.0
|
||||
elif len(answer) == len(entry["answer"]):
|
||||
score = [1.0 if a == b else 0.0 for a, b in zip(answer, entry["answer"])]
|
||||
reward = sum(score) / len(score)
|
||||
else:
|
||||
reward = 0.01
|
||||
except:
|
||||
reward = 0.01
|
||||
return reward
|
||||
|
||||
|
||||
register_dataset("emoji_mystery", EmojiMysteryDataset, EmojiMysteryConfig)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import random
|
||||
from random import Random
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
NUM_OF_PAIRS_GENERATED = 5
|
||||
|
||||
|
|
@ -65,7 +65,7 @@ def create_numbers_divisible_by_five_or_ten(rng: Random):
|
|||
return result
|
||||
|
||||
|
||||
def generate_0(rng: Random) -> Dict[str, Any]:
|
||||
def generate_0(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where input remains unchanged"""
|
||||
pairs = {}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ def generate_0(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_1(rng: Random) -> Dict[str, Any]:
|
||||
def generate_1(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of the third element
|
||||
after removing all other elements
|
||||
"""
|
||||
|
|
@ -95,7 +95,7 @@ def generate_1(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_2(rng: Random) -> Dict[str, Any]:
|
||||
def generate_2(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a reversed list of the input"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -108,7 +108,7 @@ def generate_2(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_3(rng: Random) -> Dict[str, Any]:
|
||||
def generate_3(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is the sum of unique elements in the list less than 30"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -127,7 +127,7 @@ def generate_3(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_4(rng: Random) -> Dict[str, Any]:
|
||||
def generate_4(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is the count of elements equal to 5"""
|
||||
pairs = {}
|
||||
for i in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -151,7 +151,7 @@ def generate_4(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_5(rng: Random) -> Dict[str, Any]:
|
||||
def generate_5(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of elements that are followed by an even number
|
||||
|
||||
NOTE: This is suppose to be a relatively hard problem
|
||||
|
|
@ -173,7 +173,7 @@ def generate_5(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_6(rng: Random) -> Dict[str, Any]:
|
||||
def generate_6(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of elements where each element in input is added to its position(Using zero-indexing)"""
|
||||
pairs = {}
|
||||
for i in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -190,7 +190,7 @@ def generate_6(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_7(rng: Random) -> Dict[str, Any]:
|
||||
def generate_7(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of element whose position is indicated by the last element in the input
|
||||
|
||||
EXAMPLE:
|
||||
|
|
@ -213,7 +213,7 @@ def generate_7(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_8(rng: Random) -> Dict[str, Any]:
|
||||
def generate_8(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is count of elements in the input"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -227,7 +227,7 @@ def generate_8(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_9(rng: Random) -> Dict[str, Any]:
|
||||
def generate_9(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is sum total of elements in the input"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -241,7 +241,7 @@ def generate_9(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_10(rng: Random) -> Dict[str, Any]:
|
||||
def generate_10(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of the elements in ascending order"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -255,7 +255,7 @@ def generate_10(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_11(rng: Random) -> Dict[str, Any]:
|
||||
def generate_11(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of the elements in descending order"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -269,7 +269,7 @@ def generate_11(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_12(rng: Random) -> Dict[str, Any]:
|
||||
def generate_12(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a list of the elements where the first and last element in input are replaced by their
|
||||
successor. Example, for an integer 4, successor is 5
|
||||
"""
|
||||
|
|
@ -288,7 +288,7 @@ def generate_12(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_13(rng: Random) -> Dict[str, Any]:
|
||||
def generate_13(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is [1] if list of input elements is in ascending order, [0] in descending order"""
|
||||
pairs = {}
|
||||
for i in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -307,7 +307,7 @@ def generate_13(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_14(rng: Random) -> Dict[str, Any]:
|
||||
def generate_14(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is [1] if input element is divisible by 10, [0] if divisible by 5"""
|
||||
pairs = {}
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ def generate_14(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_15(rng: Random) -> Dict[str, Any]:
|
||||
def generate_15(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is a twice the amount of last element in the input"""
|
||||
pairs = {}
|
||||
for _ in range(NUM_OF_PAIRS_GENERATED):
|
||||
|
|
@ -348,7 +348,7 @@ def generate_15(rng: Random) -> Dict[str, Any]:
|
|||
return pairs
|
||||
|
||||
|
||||
def generate_16(rng: Random) -> Dict[str, Any]:
|
||||
def generate_16(rng: Random) -> dict[str, Any]:
|
||||
"""Generate input and output pairs where output is built from a function 2x - 4
|
||||
NOTE: This is suppose to be amazingly hard for the LLM.
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue