from dataclasses import dataclass from random import Random from typing import Any, Optional from ..factory import ProceduralDataset, register_dataset def is_prime(n): """Return True if n is a prime number, False otherwise.""" if n < 2: return False for i in range(2, int(n**0.5) + 1): if n % i == 0: return False return True def is_composite(n): """ Return True if n is composite. (Composite means an integer greater than 1 that is not prime.) """ return n > 1 and not is_prime(n) def generate_dynamic_puzzle(difficulty, rng): """ Dynamically generates a 7-statement self-referential puzzle. The seven statements (with parameters determined by this function) are: 1. "At least a of these 7 statements are true." 2. "At most b of these 7 statements are false." 3. "Exactly c of these 7 statements are true." 4. "Exactly d of these 7 statements are false." 5. "Either Statement 3 or Statement 4 is true, but not both." 6. "The number of true statements is a prime number." 7. "The number of false statements is a composite number." The idea is to choose an intended number T (1 ≤ T ≤ 6) of true statements and then “plant” an intended solution. In our construction the truth values for Statements 6 and 7 are forced by T (e.g. Statement 6 should be true exactly when T is prime). For the first four statements the numeric parameters (a, b, c, d) are chosen so that the statement evaluates correctly when compared to T. The difficulty parameter (an integer, e.g. 1 for easy up to 10 for hard) influences how “borderline” the numeric choices are. At lower difficulty the numbers are chosen with a clear gap; at higher difficulty they are chosen closer to T. Returns: dict: A puzzle dictionary containing: - 'n': number of statements (always 7 here), - 'statements_text': a list of 7 strings (one per statement), - 'parameters': a dict with the numeric parameters (for statements 1-4), - 'intended_assignment': the intended truth values (list of 7 booleans), - 'intended_T': the intended number of true statements. """ n = 7 # Choose an intended number of true statements, T, from 1 to 6 (nontrivial). T = rng.choice(range(1, n)) # For the global statements (6 and 7), the intended truth is forced: intended6 = is_prime(T) # Statement 6 must be true if T is prime. intended7 = is_composite(n - T) # Statement 7 must be true if (# false) is composite. # Among statements 1-5, we need exactly k trues such that overall the total becomes T. # Let k = T - (truth from statements 6 and 7). forced_true_count = (1 if intended6 else 0) + (1 if intended7 else 0) k = T - forced_true_count # k must be between 0 and 5. if not (0 <= k <= 5): # If for some reason it is not in range, fall back to a known configuration (T=4). T = 4 intended6 = False intended7 = False k = 4 # so that overall T=4. intended_assignment_15 = [True, True, True, True, False] else: # For statements 1-5, randomly choose which ones are intended true. # We'll index these as 0..4 corresponding to statements 1..5. intended_assignment_15 = [False] * 5 if k > 0: true_indices = set(rng.sample(range(5), k)) for i in true_indices: intended_assignment_15[i] = True # Now, for statements 1-4, choose numeric parameters based on whether the statement is # intended to be true or false. We use the difficulty parameter to control the "margin." # # For statement 1: "At least a of these 7 statements are true." # The condition is: T >= a. def choose_at_least_param(T, intended, diff, rng): # diff will be used as a margin factor: lower diff => wider gap. if intended: # must have a <= T. # At easy difficulty, choose a clearly below T (if possible). low = 1 high = T # At lower difficulty, bias toward the lower end. return rng.randint(low, high) else: # must have a > T. low = T + 1 high = n # a can be at most n. if low > high: return n return rng.randint(low, high) a_param = choose_at_least_param(T, intended_assignment_15[0], difficulty, rng) # For statement 2: "At most b of these 7 statements are false." # F = n - T, so condition is: (n - T) <= b <=> T >= n - b. def choose_at_most_param(T, intended, diff, rng): if intended: # b must be >= n - T. low = n - T high = n return rng.randint(low, high) else: # b must be < n - T. low = 0 high = max(n - T - 1, 0) return rng.randint(low, high) b_param = choose_at_most_param(T, intended_assignment_15[1], difficulty, rng) # For statement 3: "Exactly c of these 7 statements are true." def choose_exactly_true_param(T, intended, diff, rng): if intended: return T else: choices = [x for x in range(0, n + 1) if x != T] return rng.choice(choices) c_param = choose_exactly_true_param(T, intended_assignment_15[2], difficulty, rng) # For statement 4: "Exactly d of these 7 statements are false." # Condition: (n - T) == d. def choose_exactly_false_param(T, intended, diff, rng): false_count = n - T if intended: return false_count else: choices = [x for x in range(0, n + 1) if x != false_count] return rng.choice(choices) d_param = choose_exactly_false_param(T, intended_assignment_15[3], difficulty, rng) # For statement 5: "Either Statement 3 or Statement 4 is true, but not both." # We do not need a parameter here; the intended condition is that the truth values for # statements 3 and 4 (which are positions 2 and 3 in our 0-indexed list) differ. # The intended truth for statement 5 is taken from our assignment. # (Later the verification function will check: solution[2] != solution[3].) # Build the intended assignment for all 7 statements. # For statements 1-5, we use our generated intended_assignment_15. intended_assignment = [ intended_assignment_15[0], intended_assignment_15[1], intended_assignment_15[2], intended_assignment_15[3], intended_assignment_15[4], intended6, intended7, ] # (If the total intended true count doesn't equal T, adjust statement 5.) current_T = sum(intended_assignment) if current_T != T: # Since only statement 5 is free (its parameter wasn't numeric), # force its intended truth to be what is needed. intended_assignment[4] = T - (current_T - (1 if intended_assignment[4] else 0)) == 1 # Now build the text for each statement. statements_text = [ f"Statement 1: 'At least {a_param} of these 7 statements are true.'", f"Statement 2: 'At most {b_param} of these 7 statements are false.'", f"Statement 3: 'Exactly {c_param} of these 7 statements are true.'", f"Statement 4: 'Exactly {d_param} of these 7 statements are false.'", "Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'", "Statement 6: 'The number of true statements is a prime number.'", "Statement 7: 'The number of false statements is a composite number.'", ] return { "n": n, "statements_text": statements_text, "parameters": { "a": a_param, "b": b_param, "c": c_param, "d": d_param, }, "intended_assignment": intended_assignment, "intended_T": T, "difficulty": difficulty, } def verify_solution_dynamic(puzzle, solution): """ Verifies a candidate solution for a dynamically generated puzzle. The rules are: - If a statement is marked True, then its claim must hold. - If a statement is marked False, then its claim must fail. The conditions are as follows: 1. "At least a of these 7 statements are true." => (T >= a) 2. "At most b of these 7 statements are false." => (F <= b) 3. "Exactly c of these 7 statements are true." => (T == c) 4. "Exactly d of these 7 statements are false." => (F == d) 5. "Either Statement 3 or Statement 4 is true, but not both." => (solution[2] != solution[3]) 6. "The number of true statements is a prime number." => is_prime(T) 7. "The number of false statements is a composite number." => is_composite(F) Parameters: puzzle (dict): The puzzle dictionary returned by generate_dynamic_puzzle. solution (list of bool): A candidate assignment (length 7). Returns: bool: True if candidate is self-consistent; False otherwise. """ n = puzzle["n"] if len(solution) != n: return False T = sum(solution) F = n - T params = puzzle["parameters"] # Statement 1: "At least a of these 7 statements are true." cond1 = T >= params["a"] if solution[0] and not cond1: return False if not solution[0] and cond1: return False # Statement 2: "At most b of these 7 statements are false." cond2 = F <= params["b"] if solution[1] and not cond2: return False if not solution[1] and cond2: return False # Statement 3: "Exactly c of these 7 statements are true." cond3 = T == params["c"] if solution[2] and not cond3: return False if not solution[2] and cond3: return False # Statement 4: "Exactly d of these 7 statements are false." cond4 = F == params["d"] if solution[3] and not cond4: return False if not solution[3] and cond4: return False # Statement 5: "Either Statement 3 or Statement 4 is true, but not both." cond5 = solution[2] != solution[3] if solution[4] and not cond5: return False if not solution[4] and cond5: return False # Statement 6: "The number of true statements is a prime number." cond6 = is_prime(T) if solution[5] and not cond6: return False if not solution[5] and cond6: return False # Statement 7: "The number of false statements is a composite number." cond7 = is_composite(F) if solution[6] and not cond7: return False if not solution[6] and cond7: return False return True def print_puzzle_dynamic(puzzle): """Prints the dynamically generated puzzle.""" x = "" for stmt in puzzle["statements_text"]: x = x + " - " + stmt + "\n" return x def solve_puzzle_dynamic(puzzle): """ Searches all 2^7 possible truth assignments and returns those that are self-consistent with the generated puzzle. """ n = puzzle["n"] valid_solutions = [] for i in range(2**n): candidate = [(i >> j) & 1 == 1 for j in range(n)] if verify_solution_dynamic(puzzle, candidate): valid_solutions.append(candidate) return valid_solutions @dataclass class SelfReferenceConfig: """Configuration for SelfReference puzzle generation""" difficulty: int = 5 seed: Optional[int] = None size: int = 500 def validate(self): """Validate configuration parameters""" assert 1 <= self.difficulty <= 10, "difficulty must be between 1 and 10" class SelfReferenceDataset(ProceduralDataset): """Generates self-referential puzzles""" def __init__(self, config: SelfReferenceConfig): super().__init__(config=config, seed=config.seed, size=config.size) def __getitem__(self, idx: int) -> dict: """Generate a single SelfReference task Returns: dict with keys: - question: str, the task description - answer: str, a solution string - metadata: dict with generation parameters """ rng = Random(self.seed + idx) # Generate puzzle puzzle = generate_dynamic_puzzle(self.config.difficulty, rng) puzz_s = ( "Given the truthfulness of these statements, please tell me the number of possible solutions: \n" + print_puzzle_dynamic(puzzle) ) # Solve puzzle solutions = solve_puzzle_dynamic(puzzle) for idx, sol in enumerate(solutions, start=1): sol_str = ["True" if s else "False" for s in sol] answer = len(solutions) return { "question": puzz_s, "answer": answer, "metadata": {}, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: """Determine if the solution provided solves the SelfReference task. The function awards 1.0 for a correct answer. Args: answer (Optional[str]): The user's answer. entry (dict[str, Any]): The original dataset entry containing the correct answer. Returns: float: The computed score between 0.0 and 1.0. """ if answer == None: return 0.0 if str(answer) != str(entry["answer"]): return 0.1 else: return 1.0 # Yay register_dataset("self_reference", SelfReferenceDataset, SelfReferenceConfig)