mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
373 lines
13 KiB
Python
373 lines
13 KiB
Python
from dataclasses import dataclass
|
|
from random import Random
|
|
from typing import Any, Optional
|
|
|
|
from ..factory import ProceduralDataset, register_dataset
|
|
|
|
|
|
def is_prime(n):
|
|
"""Return True if n is a prime number, False otherwise."""
|
|
if n < 2:
|
|
return False
|
|
for i in range(2, int(n**0.5) + 1):
|
|
if n % i == 0:
|
|
return False
|
|
return True
|
|
|
|
|
|
def is_composite(n):
|
|
"""
|
|
Return True if n is composite.
|
|
(Composite means an integer greater than 1 that is not prime.)
|
|
"""
|
|
return n > 1 and not is_prime(n)
|
|
|
|
|
|
def generate_dynamic_puzzle(difficulty, rng):
|
|
"""
|
|
Dynamically generates a 7-statement self-referential puzzle.
|
|
|
|
The seven statements (with parameters determined by this function) are:
|
|
|
|
1. "At least a of these 7 statements are true."
|
|
2. "At most b of these 7 statements are false."
|
|
3. "Exactly c of these 7 statements are true."
|
|
4. "Exactly d of these 7 statements are false."
|
|
5. "Either Statement 3 or Statement 4 is true, but not both."
|
|
6. "The number of true statements is a prime number."
|
|
7. "The number of false statements is a composite number."
|
|
|
|
The idea is to choose an intended number T (1 ≤ T ≤ 6) of true statements
|
|
and then “plant” an intended solution. In our construction the truth values
|
|
for Statements 6 and 7 are forced by T (e.g. Statement 6 should be true exactly
|
|
when T is prime). For the first four statements the numeric parameters (a, b, c, d)
|
|
are chosen so that the statement evaluates correctly when compared to T.
|
|
|
|
The difficulty parameter (an integer, e.g. 1 for easy up to 10 for hard)
|
|
influences how “borderline” the numeric choices are. At lower difficulty the numbers
|
|
are chosen with a clear gap; at higher difficulty they are chosen closer to T.
|
|
|
|
Returns:
|
|
dict: A puzzle dictionary containing:
|
|
- 'n': number of statements (always 7 here),
|
|
- 'statements_text': a list of 7 strings (one per statement),
|
|
- 'parameters': a dict with the numeric parameters (for statements 1-4),
|
|
- 'intended_assignment': the intended truth values (list of 7 booleans),
|
|
- 'intended_T': the intended number of true statements.
|
|
"""
|
|
n = 7
|
|
|
|
# Choose an intended number of true statements, T, from 1 to 6 (nontrivial).
|
|
T = rng.choice(range(1, n))
|
|
|
|
# For the global statements (6 and 7), the intended truth is forced:
|
|
intended6 = is_prime(T) # Statement 6 must be true if T is prime.
|
|
intended7 = is_composite(n - T) # Statement 7 must be true if (# false) is composite.
|
|
|
|
# Among statements 1-5, we need exactly k trues such that overall the total becomes T.
|
|
# Let k = T - (truth from statements 6 and 7).
|
|
forced_true_count = (1 if intended6 else 0) + (1 if intended7 else 0)
|
|
k = T - forced_true_count
|
|
# k must be between 0 and 5.
|
|
if not (0 <= k <= 5):
|
|
# If for some reason it is not in range, fall back to a known configuration (T=4).
|
|
T = 4
|
|
intended6 = False
|
|
intended7 = False
|
|
k = 4 # so that overall T=4.
|
|
intended_assignment_15 = [True, True, True, True, False]
|
|
else:
|
|
# For statements 1-5, randomly choose which ones are intended true.
|
|
# We'll index these as 0..4 corresponding to statements 1..5.
|
|
intended_assignment_15 = [False] * 5
|
|
if k > 0:
|
|
true_indices = set(rng.sample(range(5), k))
|
|
for i in true_indices:
|
|
intended_assignment_15[i] = True
|
|
|
|
# Now, for statements 1-4, choose numeric parameters based on whether the statement is
|
|
# intended to be true or false. We use the difficulty parameter to control the "margin."
|
|
#
|
|
# For statement 1: "At least a of these 7 statements are true."
|
|
# The condition is: T >= a.
|
|
def choose_at_least_param(T, intended, diff, rng):
|
|
# diff will be used as a margin factor: lower diff => wider gap.
|
|
if intended: # must have a <= T.
|
|
# At easy difficulty, choose a clearly below T (if possible).
|
|
low = 1
|
|
high = T
|
|
# At lower difficulty, bias toward the lower end.
|
|
return rng.randint(low, high)
|
|
else: # must have a > T.
|
|
low = T + 1
|
|
high = n # a can be at most n.
|
|
if low > high:
|
|
return n
|
|
return rng.randint(low, high)
|
|
|
|
a_param = choose_at_least_param(T, intended_assignment_15[0], difficulty, rng)
|
|
|
|
# For statement 2: "At most b of these 7 statements are false."
|
|
# F = n - T, so condition is: (n - T) <= b <=> T >= n - b.
|
|
def choose_at_most_param(T, intended, diff, rng):
|
|
if intended: # b must be >= n - T.
|
|
low = n - T
|
|
high = n
|
|
return rng.randint(low, high)
|
|
else:
|
|
# b must be < n - T.
|
|
low = 0
|
|
high = max(n - T - 1, 0)
|
|
return rng.randint(low, high)
|
|
|
|
b_param = choose_at_most_param(T, intended_assignment_15[1], difficulty, rng)
|
|
|
|
# For statement 3: "Exactly c of these 7 statements are true."
|
|
def choose_exactly_true_param(T, intended, diff, rng):
|
|
if intended:
|
|
return T
|
|
else:
|
|
choices = [x for x in range(0, n + 1) if x != T]
|
|
return rng.choice(choices)
|
|
|
|
c_param = choose_exactly_true_param(T, intended_assignment_15[2], difficulty, rng)
|
|
|
|
# For statement 4: "Exactly d of these 7 statements are false."
|
|
# Condition: (n - T) == d.
|
|
def choose_exactly_false_param(T, intended, diff, rng):
|
|
false_count = n - T
|
|
if intended:
|
|
return false_count
|
|
else:
|
|
choices = [x for x in range(0, n + 1) if x != false_count]
|
|
return rng.choice(choices)
|
|
|
|
d_param = choose_exactly_false_param(T, intended_assignment_15[3], difficulty, rng)
|
|
|
|
# For statement 5: "Either Statement 3 or Statement 4 is true, but not both."
|
|
# We do not need a parameter here; the intended condition is that the truth values for
|
|
# statements 3 and 4 (which are positions 2 and 3 in our 0-indexed list) differ.
|
|
# The intended truth for statement 5 is taken from our assignment.
|
|
# (Later the verification function will check: solution[2] != solution[3].)
|
|
|
|
# Build the intended assignment for all 7 statements.
|
|
# For statements 1-5, we use our generated intended_assignment_15.
|
|
intended_assignment = [
|
|
intended_assignment_15[0],
|
|
intended_assignment_15[1],
|
|
intended_assignment_15[2],
|
|
intended_assignment_15[3],
|
|
intended_assignment_15[4],
|
|
intended6,
|
|
intended7,
|
|
]
|
|
|
|
# (If the total intended true count doesn't equal T, adjust statement 5.)
|
|
current_T = sum(intended_assignment)
|
|
if current_T != T:
|
|
# Since only statement 5 is free (its parameter wasn't numeric),
|
|
# force its intended truth to be what is needed.
|
|
intended_assignment[4] = T - (current_T - (1 if intended_assignment[4] else 0)) == 1
|
|
|
|
# Now build the text for each statement.
|
|
statements_text = [
|
|
f"Statement 1: 'At least {a_param} of these 7 statements are true.'",
|
|
f"Statement 2: 'At most {b_param} of these 7 statements are false.'",
|
|
f"Statement 3: 'Exactly {c_param} of these 7 statements are true.'",
|
|
f"Statement 4: 'Exactly {d_param} of these 7 statements are false.'",
|
|
"Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'",
|
|
"Statement 6: 'The number of true statements is a prime number.'",
|
|
"Statement 7: 'The number of false statements is a composite number.'",
|
|
]
|
|
|
|
return {
|
|
"n": n,
|
|
"statements_text": statements_text,
|
|
"parameters": {
|
|
"a": a_param,
|
|
"b": b_param,
|
|
"c": c_param,
|
|
"d": d_param,
|
|
},
|
|
"intended_assignment": intended_assignment,
|
|
"intended_T": T,
|
|
"difficulty": difficulty,
|
|
}
|
|
|
|
|
|
def verify_solution_dynamic(puzzle, solution):
|
|
"""
|
|
Verifies a candidate solution for a dynamically generated puzzle.
|
|
|
|
The rules are:
|
|
- If a statement is marked True, then its claim must hold.
|
|
- If a statement is marked False, then its claim must fail.
|
|
|
|
The conditions are as follows:
|
|
1. "At least a of these 7 statements are true." => (T >= a)
|
|
2. "At most b of these 7 statements are false." => (F <= b)
|
|
3. "Exactly c of these 7 statements are true." => (T == c)
|
|
4. "Exactly d of these 7 statements are false." => (F == d)
|
|
5. "Either Statement 3 or Statement 4 is true, but not both." => (solution[2] != solution[3])
|
|
6. "The number of true statements is a prime number." => is_prime(T)
|
|
7. "The number of false statements is a composite number." => is_composite(F)
|
|
|
|
Parameters:
|
|
puzzle (dict): The puzzle dictionary returned by generate_dynamic_puzzle.
|
|
solution (list of bool): A candidate assignment (length 7).
|
|
|
|
Returns:
|
|
bool: True if candidate is self-consistent; False otherwise.
|
|
"""
|
|
n = puzzle["n"]
|
|
if len(solution) != n:
|
|
return False
|
|
T = sum(solution)
|
|
F = n - T
|
|
params = puzzle["parameters"]
|
|
|
|
# Statement 1: "At least a of these 7 statements are true."
|
|
cond1 = T >= params["a"]
|
|
if solution[0] and not cond1:
|
|
return False
|
|
if not solution[0] and cond1:
|
|
return False
|
|
|
|
# Statement 2: "At most b of these 7 statements are false."
|
|
cond2 = F <= params["b"]
|
|
if solution[1] and not cond2:
|
|
return False
|
|
if not solution[1] and cond2:
|
|
return False
|
|
|
|
# Statement 3: "Exactly c of these 7 statements are true."
|
|
cond3 = T == params["c"]
|
|
if solution[2] and not cond3:
|
|
return False
|
|
if not solution[2] and cond3:
|
|
return False
|
|
|
|
# Statement 4: "Exactly d of these 7 statements are false."
|
|
cond4 = F == params["d"]
|
|
if solution[3] and not cond4:
|
|
return False
|
|
if not solution[3] and cond4:
|
|
return False
|
|
|
|
# Statement 5: "Either Statement 3 or Statement 4 is true, but not both."
|
|
cond5 = solution[2] != solution[3]
|
|
if solution[4] and not cond5:
|
|
return False
|
|
if not solution[4] and cond5:
|
|
return False
|
|
|
|
# Statement 6: "The number of true statements is a prime number."
|
|
cond6 = is_prime(T)
|
|
if solution[5] and not cond6:
|
|
return False
|
|
if not solution[5] and cond6:
|
|
return False
|
|
|
|
# Statement 7: "The number of false statements is a composite number."
|
|
cond7 = is_composite(F)
|
|
if solution[6] and not cond7:
|
|
return False
|
|
if not solution[6] and cond7:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def print_puzzle_dynamic(puzzle):
|
|
"""Prints the dynamically generated puzzle."""
|
|
x = ""
|
|
for stmt in puzzle["statements_text"]:
|
|
x = x + " - " + stmt + "\n"
|
|
return x
|
|
|
|
|
|
def solve_puzzle_dynamic(puzzle):
|
|
"""
|
|
Searches all 2^7 possible truth assignments and returns those that
|
|
are self-consistent with the generated puzzle.
|
|
"""
|
|
n = puzzle["n"]
|
|
valid_solutions = []
|
|
for i in range(2**n):
|
|
candidate = [(i >> j) & 1 == 1 for j in range(n)]
|
|
if verify_solution_dynamic(puzzle, candidate):
|
|
valid_solutions.append(candidate)
|
|
return valid_solutions
|
|
|
|
|
|
@dataclass
|
|
class SelfReferenceConfig:
|
|
"""Configuration for SelfReference puzzle generation"""
|
|
|
|
difficulty: int = 5
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self):
|
|
"""Validate configuration parameters"""
|
|
assert 1 <= self.difficulty <= 10, "difficulty must be between 1 and 10"
|
|
|
|
|
|
class SelfReferenceDataset(ProceduralDataset):
|
|
"""Generates self-referential puzzles"""
|
|
|
|
def __init__(self, config: SelfReferenceConfig):
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
"""Generate a single SelfReference task
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- question: str, the task description
|
|
- answer: str, a solution string
|
|
- metadata: dict with generation parameters
|
|
"""
|
|
rng = Random(self.seed + idx)
|
|
|
|
# Generate puzzle
|
|
puzzle = generate_dynamic_puzzle(self.config.difficulty, rng)
|
|
puzz_s = (
|
|
"Given the truthfulness of these statements, please tell me the number of possible solutions: \n"
|
|
+ print_puzzle_dynamic(puzzle)
|
|
)
|
|
|
|
# Solve puzzle
|
|
solutions = solve_puzzle_dynamic(puzzle)
|
|
for idx, sol in enumerate(solutions, start=1):
|
|
sol_str = ["True" if s else "False" for s in sol]
|
|
answer = len(solutions)
|
|
|
|
return {
|
|
"question": puzz_s,
|
|
"answer": answer,
|
|
"metadata": {},
|
|
}
|
|
|
|
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
|
"""Determine if the solution provided solves the SelfReference task.
|
|
|
|
The function awards 1.0 for a correct answer.
|
|
|
|
Args:
|
|
answer (Optional[str]): The user's answer.
|
|
entry (dict[str, Any]): The original dataset entry containing the correct answer.
|
|
|
|
Returns:
|
|
float: The computed score between 0.0 and 1.0.
|
|
"""
|
|
|
|
if answer == None:
|
|
return 0.0
|
|
if str(answer) != str(entry["answer"]):
|
|
return 0.1
|
|
else:
|
|
return 1.0 # Yay
|
|
|
|
|
|
register_dataset("self_reference", SelfReferenceDataset, SelfReferenceConfig)
|