diff --git a/GALLERY.md b/GALLERY.md
index 7164e6c4..d56dda30 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -10,7 +10,9 @@ This gallery shows examples from all available datasets using their default conf
 - [base_conversion](#base_conversion)
 - [basic_arithmetic](#basic_arithmetic)
 - [bf](#bf)
+- [binary_alternation](#binary_alternation)
 - [binary_matrix](#binary_matrix)
+- [bitwise_arithmetic](#bitwise_arithmetic)
 - [caesar_cipher](#caesar_cipher)
 - [calendar_arithmetic](#calendar_arithmetic)
 - [chain_sum](#chain_sum)
@@ -22,6 +24,8 @@ This gallery shows examples from all available datasets using their default conf
 - [countdown](#countdown)
 - [course_schedule](#course_schedule)
 - [cryptarithm](#cryptarithm)
+- [decimal_arithmetic](#decimal_arithmetic)
+- [decimal_chain_sum](#decimal_chain_sum)
 - [dice](#dice)
 - [family_relationships](#family_relationships)
 - [figlet_font](#figlet_font)
@@ -34,16 +38,19 @@ This gallery shows examples from all available datasets using their default conf
 - [gsm_symbolic](#gsm_symbolic)
 - [intermediate_integration](#intermediate_integration)
 - [isomorphic_strings](#isomorphic_strings)
+- [jugs](#jugs)
 - [knight_swap](#knight_swap)
 - [largest_island](#largest_island)
 - [lcm](#lcm)
 - [leg_counting](#leg_counting)
 - [letter_counting](#letter_counting)
 - [letter_jumble](#letter_jumble)
+- [list_functions](#list_functions)
 - [manipulate_matrix](#manipulate_matrix)
 - [maze](#maze)
 - [mini_sudoku](#mini_sudoku)
 - [n_queens](#n_queens)
+- [needle_haystack](#needle_haystack)
 - [number_filtering](#number_filtering)
 - [number_format](#number_format)
 - [number_sequence](#number_sequence)
@@ -62,6 +69,7 @@ This gallery shows examples from all available datasets using their default conf
 - [rearc](#rearc)
 - [rectangle_count](#rectangle_count)
 - [rotate_matrix](#rotate_matrix)
+- [rotten_oranges](#rotten_oranges)
 - [rubiks_cube](#rubiks_cube)
 - [rush_hour](#rush_hour)
 - [self_reference](#self_reference)
@@ -227,19 +235,31 @@ task_types = ['orthocenter', 'incircle_radius', 'angle_measure']
 Example tasks:
 ````
 Example 1:
-Question: In triangle ABC with coordinates A=(-7, -10), B=(-2, -3), and C=(-3, -6), find the measure (in degrees) of angle ABC.
+Question: In triangle ABC with coordinates A=(-7, -10), B=(-2, -3), and C=(-3, -6), find the measure (in degrees) of angle ABC. For all geometry problems:
+1. Give coordinates in the form (x, y)
+2. Round decimal answers to 3 decimal places
+3. Use the degree symbol ° for angles
+4. Return only th angle, coordinates, or radius as your answer.
 Answer: 17.10°
-Metadata: {'A': (-7, -10), 'B': (-2, -3), 'C': (-3, -6), 'angle_ABC_degrees': 17.10272896905237}
+Metadata: {'A': (-7, -10), 'B': (-2, -3), 'C': (-3, -6), 'angle_ABC_degrees': 17.10272896905237, 'task_type': 'angle_measure'}
 
 Example 2:
-Question: For triangle with vertices A=(-1, -6), B=(4, 1), and C=(-7, 4), determine the orthocenter (intersection of altitudes).
+Question: For triangle with vertices A=(-1, -6), B=(4, 1), and C=(-7, 4), determine the orthocenter (intersection of altitudes). For all geometry problems:
+1. Give coordinates in the form (x, y)
+2. Round decimal answers to 3 decimal places
+3. Use the degree symbol ° for angles
+4. Return only th angle, coordinates, or radius as your answer.
 Answer: (0.304, -1.217)
-Metadata: {'A': (-1, -6), 'B': (4, 1), 'C': (-7, 4), 'orthocenter_exact': ('7/23', '-28/23'), 'orthocenter_approx': (0.30434782608695654, -1.2173913043478262)}
+Metadata: {'A': (-1, -6), 'B': (4, 1), 'C': (-7, 4), 'ortho': (7/23, -28/23), 'orthocenter_exact': ('7/23', '-28/23'), 'orthocenter_approx': (0.30434782608695654, -1.2173913043478262), 'task_type': 'orthocenter'}
 
 Example 3:
-Question: Find the incircle radius of triangle ABC whose vertices are A=(6, 7), B=(-7, -5), and C=(2, -3).
+Question: Find the incircle radius of triangle ABC whose vertices are A=(6, 7), B=(-7, -5), and C=(2, -3). For all geometry problems:
+1. Give coordinates in the form (x, y)
+2. Round decimal answers to 3 decimal places
+3. Use the degree symbol ° for angles
+4. Return only th angle, coordinates, or radius as your answer.
 Answer: 2.176
-Metadata: {'A': (6, 7), 'B': (-7, -5), 'C': (2, -3), 'incircle_radius_exact': 'sqrt(-sqrt(29) + sqrt(85)/2 + sqrt(313)/2)*sqrt(-sqrt(313)/2 + sqrt(85)/2 + sqrt(29))*sqrt(-sqrt(85)/2 + sqrt(29) + sqrt(313)/2)/sqrt(sqrt(85)/2 + sqrt(29) + sqrt(313)/2)', 'incircle_radius_approx': 2.176123777286009}
+Metadata: {'A': (6, 7), 'B': (-7, -5), 'C': (2, -3), 'incircle_radius_exact': 'sqrt(-sqrt(29) + sqrt(85)/2 + sqrt(313)/2)*sqrt(-sqrt(313)/2 + sqrt(85)/2 + sqrt(29))*sqrt(-sqrt(85)/2 + sqrt(29) + sqrt(313)/2)/sqrt(sqrt(85)/2 + sqrt(29) + sqrt(313)/2)', 'incircle_radius_approx': 2.176123777286009, 'task_type': 'incircle_radius'}
 
 ````
 
@@ -901,17 +921,17 @@ whitespace = single
 Example tasks:
 ````
 Example 1:
-Question: Calculate -5 * -6. Ensure to report the answer as an integer. Do not add commas to the integer answers reported.
+Question: Calculate -5 * -6.
 Answer: 30
 Metadata: {'num_terms': 2, 'num_digits': 1, 'expression': '-5 * -6'}
 
 Example 2:
-Question: Calculate 965 / 5. Ensure to report the answer as an integer. Do not add commas to the integer answers reported.
+Question: Calculate 965 / 5.
 Answer: 193
 Metadata: {'num_terms': 2, 'num_digits': 3, 'expression': '965 / 5'}
 
 Example 3:
-Question: Calculate 0 + -2 + -4 * 0 * 3. Ensure to report the answer as an integer. Do not add commas to the integer answers reported.
+Question: Calculate 0 + -2 + -4 * 0 * 3.
 Answer: -2
 Metadata: {'num_terms': 5, 'num_digits': 1, 'expression': '0 + -2 + -4 * 0 * 3'}
 
@@ -958,6 +978,70 @@ Metadata: {'bfit_code': '\nint main() {\n    print("under");\n}\n', 'bf_program'
 
 ````
 
+### binary_alternation
+Generates Binary Alternation exercises with configurable difficulty
+
+Default configuration:
+```python
+min_n = 10
+max_n = 30
+p_solvable = 0.8
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: Given a binary string, return the minimum number of character swaps to make it alternating, or -1 if it is impossible.
+
+The string is called alternating if no two adjacent characters are equal. For example, the strings "010" and "1010" are alternating, while the string "0100" is not.
+
+Any two characters may be swapped, even if they are not adjacent.
+
+Example:
+- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
+- Output: 1
+
+Now, determine the minimum number of swaps to make the following binary string alternating: 0010101011
+
+Answer: 1
+Metadata: {'string': '0010101011', 'solution': 1, 'solvable': True}
+
+Example 2:
+Question: Given a binary string, return the minimum number of character swaps to make it alternating, or -1 if it is impossible.
+
+The string is called alternating if no two adjacent characters are equal. For example, the strings "010" and "1010" are alternating, while the string "0100" is not.
+
+Any two characters may be swapped, even if they are not adjacent.
+
+Example:
+- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
+- Output: 1
+
+Now, determine the minimum number of swaps to make the following binary string alternating: 00011111001010
+
+Answer: 3
+Metadata: {'string': '00011111001010', 'solution': 3, 'solvable': True}
+
+Example 3:
+Question: Given a binary string, return the minimum number of character swaps to make it alternating, or -1 if it is impossible.
+
+The string is called alternating if no two adjacent characters are equal. For example, the strings "010" and "1010" are alternating, while the string "0100" is not.
+
+Any two characters may be swapped, even if they are not adjacent.
+
+Example:
+- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
+- Output: 1
+
+Now, determine the minimum number of swaps to make the following binary string alternating: 100000100111110000000111111
+
+Answer: 7
+Metadata: {'string': '100000100111110000000111111', 'solution': 7, 'solvable': True}
+
+````
+
 ### binary_matrix
 Generates Binary Matrix exercises with configurable difficulty
 
@@ -1078,6 +1162,56 @@ Metadata: {'matrix': [[0, 1, 1, 1, 1, 1, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1, 1],
 
 ````
 
+### bitwise_arithmetic
+Dataset that generates tasks testing understanding of bitwise arithmetic operations.
+
+    Generates expressions combining:
+    - Standard arithmetic operators (+, -, *)
+    - Bitwise shift operators (<<, >>)
+    - Multi-byte hexadecimal numbers (e.g. 0x100 to 0xFFFF)
+
+    The difficulty parameter controls expression complexity:
+    - Level 1: Simple expressions like (0x123 + 0x456)
+    - Level 2: Nested expressions with shifts like ((0x123 + 0x456) << 1)
+    - Level 3+: Deeper nesting like ((0x123 + 0x456) << (0x789 >> 1))
+
+    Each task provides:
+    - A question asking to evaluate an expression
+    - The correct answer in hexadecimal format
+    - Metadata including the raw expression
+
+    The dataset verifies answers by evaluating them as Python expressions,
+    supporting both integer and hexadecimal string formats.
+
+Default configuration:
+```python
+difficulty = 2
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Please solve this problem. Assume there is arbitrary bit depth and that there are signed integers. Reply only with the final hexidecimal value.
+((0x3a24 - 0x24b8) + (0x1741 >> 0x3))
+Answer: 0x1854
+Metadata: {'problem': '((0x3a24 - 0x24b8) + (0x1741 >> 0x3))'}
+
+Example 2:
+Question: Please solve this problem. Assume there is arbitrary bit depth and that there are signed integers. Reply only with the final hexidecimal value.
+((0xacf1 * 0xb3cc) - (0x9a4b << 0x0))
+Answer: 0x7975b8c1
+Metadata: {'problem': '((0xacf1 * 0xb3cc) - (0x9a4b << 0x0))'}
+
+Example 3:
+Question: Please solve this problem. Assume there is arbitrary bit depth and that there are signed integers. Reply only with the final hexidecimal value.
+((0x2e39 + 0x622b) >> 0x0)
+Answer: 0x9064
+Metadata: {'problem': '((0x2e39 + 0x622b) >> 0x0)'}
+
+````
+
 ### caesar_cipher
 Generates Caesar cipher encryption/decryption tasks
 
@@ -1095,17 +1229,17 @@ size = 500
 Example tasks:
 ````
 Example 1:
-Question: Decrypt this Caesar cipher text: JNJUBUF ZPVS BTTPDJBUF XIPN J XBT DPNQMJNFOUJOH B NPNFOU BHP
+Question: Decrypt this Caesar cipher text: JNJUBUF ZPVS BTTPDJBUF XIPN J XBT DPNQMJNFOUJOH B NPNFOU BHP. Provide only the decrypted text as your final answer.
 Answer: IMITATE YOUR ASSOCIATE WHOM I WAS COMPLIMENTING A MOMENT AGO
 Metadata: {'rotation': 1, 'cipher_text': 'JNJUBUF ZPVS BTTPDJBUF XIPN J XBT DPNQMJNFOUJOH B NPNFOU BHP', 'clear_text': 'IMITATE YOUR ASSOCIATE WHOM I WAS COMPLIMENTING A MOMENT AGO'}
 
 Example 2:
-Question: Decrypt this Caesar cipher text: PBSDJ XKZYVOYX CWSDR LYEQRD SD PYB K WOBO KXN YBSQSXKDON DOVOZRYXSM TYEBXKVSCW
+Question: Decrypt this Caesar cipher text: PBSDJ XKZYVOYX CWSDR LYEQRD SD PYB K WOBO KXN YBSQSXKDON DOVOZRYXSM TYEBXKVSCW. Provide only the decrypted text as your final answer.
 Answer: FRITZ NAPOLEON SMITH BOUGHT IT FOR A MERE AND ORIGINATED TELEPHONIC JOURNALISM
 Metadata: {'rotation': 10, 'cipher_text': 'PBSDJ XKZYVOYX CWSDR LYEQRD SD PYB K WOBO KXN YBSQSXKDON DOVOZRYXSM TYEBXKVSCW', 'clear_text': 'FRITZ NAPOLEON SMITH BOUGHT IT FOR A MERE AND ORIGINATED TELEPHONIC JOURNALISM'}
 
 Example 3:
-Question: Decrypt this Caesar cipher text: ZW PFLI JKFDRTY ZJ FLK FW ZK DLJK SV DVEUVU
+Question: Decrypt this Caesar cipher text: ZW PFLI JKFDRTY ZJ FLK FW ZK DLJK SV DVEUVU. Provide only the decrypted text as your final answer.
 Answer: IF YOUR STOMACH IS OUT OF IT MUST BE MENDED
 Metadata: {'rotation': 17, 'cipher_text': 'ZW PFLI JKFDRTY ZJ FLK FW ZK DLJK SV DVEUVU', 'clear_text': 'IF YOUR STOMACH IS OUT OF IT MUST BE MENDED'}
 
@@ -1407,6 +1541,7 @@ Question: A cube has:
 The cube is rotated so that the side which was before at the bottom is now at the top.
 
 What is now the color of the back side of the cube?
+Provide only the color as your final answer.
 Answer: orange
 Metadata: {'initial_state': {'top': 'pink', 'right': 'gray', 'front': 'orange', 'left': 'purple', 'back': 'indigo', 'bottom': 'cyan'}, 'rotations': ['bottom'], 'target_side': 'back', 'num_rotations': 1}
 
@@ -1426,6 +1561,7 @@ Next, the bottom side is rotated to become the top face.
 After that the cube is turned to make the bottom face the top.
 
 What is now the color of the left side of the cube?
+Provide only the color as your final answer.
 Answer: yellow
 Metadata: {'initial_state': {'top': 'gray', 'right': 'brown', 'front': 'silver', 'left': 'red', 'back': 'purple', 'bottom': 'yellow'}, 'rotations': ['left', 'bottom', 'bottom'], 'target_side': 'left', 'num_rotations': 3}
 
@@ -1445,6 +1581,7 @@ Now the cube is rotated to place its back side at the top.
 Now the cube is rotated to place its bottom side at the top.
 
 What is now the color of the left side of the cube?
+Provide only the color as your final answer.
 Answer: gold
 Metadata: {'initial_state': {'top': 'orange', 'right': 'cyan', 'front': 'violet', 'left': 'pink', 'back': 'gray', 'bottom': 'gold'}, 'rotations': ['left', 'back', 'bottom'], 'target_side': 'left', 'num_rotations': 3}
 
@@ -1682,18 +1819,28 @@ Question: Solve this cryptarithm:
   IKIZL
 
 Each letter stands for a unique digit (0-9). No leading letter can be zero.
-Provide a mapping from letters to digits that satisfies the equation.
-Here's an example:
+Provide a comma separated mapping from letters to digits that satisfies the equation in your final answer. Output format: "A=1,B=2,C=3" (without quotes)
 
+Here's an example:
+- Input:
   BASE
 + BALL
 ------
  GAMES
 
-Answer (one possible solution):
+- Output: B=7, A=4, S=8, E=3, L=5, M=9, G=1
+- Explanation:
+    * BASE + BALL = GAMES, two 4-digit numbers sum to 5 digits, so G = 1.
+    * Units: E + L = S (no carry).
+    * Tens: S + L = E + 10 (carry 1). Substitute S = E + L to get E + 2L = E + 10, so L = 5.
+    * Since S = E + 5 and S is one digit, E < 5.
+    * Hundreds: 2A + 1 = M (with carry).
+    * Thousands: 2B = A + 10 (carry makes G = 1). So A = 2B - 10.
+    * Try B = 7: Then A = 4 and M = 2(4) + 1 = 9.
+    * With E < 5, try E = 3: Then S = 8.
+    * Solution: B = 7, A = 4, S = 8, E = 3, L = 5, M = 9, G = 1
+    * Verify: BASE (7483) + BALL (7455) = GAMES (14938).
 
-B=7, A=8, S=2, E=9, L=1, G=1, M=0
-Summation: 7829 + 7811 = 15640 (the puzzle might produce a different arrangement, but the principle is the same).
 Answer: F=3,I=4,K=2,L=9,M=1,O=8,P=0,Z=7
 Metadata: {'letters': ['L', 'O', 'K', 'I', 'P', 'Z', 'M', 'F'], 'word_values': [381, 42098], 'sum_number': 42479, 'words_letters': ['FOM', 'IKPLO'], 'result_letters': 'IKIZL', 'digit_to_letter': {'9': 'L', '8': 'O', '2': 'K', '4': 'I', '0': 'P', '7': 'Z', '1': 'M', '3': 'F'}, 'letter_to_digit': {'L': 9, 'O': 8, 'K': 2, 'I': 4, 'P': 0, 'Z': 7, 'M': 1, 'F': 3}}
 
@@ -1706,18 +1853,28 @@ Question: Solve this cryptarithm:
   JHEDH
 
 Each letter stands for a unique digit (0-9). No leading letter can be zero.
-Provide a mapping from letters to digits that satisfies the equation.
-Here's an example:
+Provide a comma separated mapping from letters to digits that satisfies the equation in your final answer. Output format: "A=1,B=2,C=3" (without quotes)
 
+Here's an example:
+- Input:
   BASE
 + BALL
 ------
  GAMES
 
-Answer (one possible solution):
+- Output: B=7, A=4, S=8, E=3, L=5, M=9, G=1
+- Explanation:
+    * BASE + BALL = GAMES, two 4-digit numbers sum to 5 digits, so G = 1.
+    * Units: E + L = S (no carry).
+    * Tens: S + L = E + 10 (carry 1). Substitute S = E + L to get E + 2L = E + 10, so L = 5.
+    * Since S = E + 5 and S is one digit, E < 5.
+    * Hundreds: 2A + 1 = M (with carry).
+    * Thousands: 2B = A + 10 (carry makes G = 1). So A = 2B - 10.
+    * Try B = 7: Then A = 4 and M = 2(4) + 1 = 9.
+    * With E < 5, try E = 3: Then S = 8.
+    * Solution: B = 7, A = 4, S = 8, E = 3, L = 5, M = 9, G = 1
+    * Verify: BASE (7483) + BALL (7455) = GAMES (14938).
 
-B=7, A=8, S=2, E=9, L=1, G=1, M=0
-Summation: 7829 + 7811 = 15640 (the puzzle might produce a different arrangement, but the principle is the same).
 Answer: D=8,E=9,H=3,I=0,J=7,K=2,O=6,P=5
 Metadata: {'letters': ['O', 'K', 'H', 'P', 'I', 'D', 'E', 'J'], 'word_values': [3358, 70625], 'sum_number': 73983, 'words_letters': ['HHPD', 'JIOKP'], 'result_letters': 'JHEDH', 'digit_to_letter': {'6': 'O', '2': 'K', '3': 'H', '5': 'P', '0': 'I', '8': 'D', '9': 'E', '7': 'J'}, 'letter_to_digit': {'O': 6, 'K': 2, 'H': 3, 'P': 5, 'I': 0, 'D': 8, 'E': 9, 'J': 7}}
 
@@ -1731,23 +1888,100 @@ Question: Solve this cryptarithm:
   XXNXHZ
 
 Each letter stands for a unique digit (0-9). No leading letter can be zero.
-Provide a mapping from letters to digits that satisfies the equation.
-Here's an example:
+Provide a comma separated mapping from letters to digits that satisfies the equation in your final answer. Output format: "A=1,B=2,C=3" (without quotes)
 
+Here's an example:
+- Input:
   BASE
 + BALL
 ------
  GAMES
 
-Answer (one possible solution):
+- Output: B=7, A=4, S=8, E=3, L=5, M=9, G=1
+- Explanation:
+    * BASE + BALL = GAMES, two 4-digit numbers sum to 5 digits, so G = 1.
+    * Units: E + L = S (no carry).
+    * Tens: S + L = E + 10 (carry 1). Substitute S = E + L to get E + 2L = E + 10, so L = 5.
+    * Since S = E + 5 and S is one digit, E < 5.
+    * Hundreds: 2A + 1 = M (with carry).
+    * Thousands: 2B = A + 10 (carry makes G = 1). So A = 2B - 10.
+    * Try B = 7: Then A = 4 and M = 2(4) + 1 = 9.
+    * With E < 5, try E = 3: Then S = 8.
+    * Solution: B = 7, A = 4, S = 8, E = 3, L = 5, M = 9, G = 1
+    * Verify: BASE (7483) + BALL (7455) = GAMES (14938).
 
-B=7, A=8, S=2, E=9, L=1, G=1, M=0
-Summation: 7829 + 7811 = 15640 (the puzzle might produce a different arrangement, but the principle is the same).
 Answer: A=0,G=7,H=9,N=8,P=3,R=2,X=1,Z=5
 Metadata: {'letters': ['Z', 'H', 'N', 'G', 'X', 'A', 'R', 'P'], 'word_values': [25290, 33155, 59750], 'sum_number': 118195, 'words_letters': ['RZRHA', 'PPXZZ', 'ZHGZA'], 'result_letters': 'XXNXHZ', 'digit_to_letter': {'5': 'Z', '9': 'H', '8': 'N', '7': 'G', '1': 'X', '0': 'A', '2': 'R', '3': 'P'}, 'letter_to_digit': {'Z': 5, 'H': 9, 'N': 8, 'G': 7, 'X': 1, 'A': 0, 'R': 2, 'P': 3}}
 
 ````
 
+### decimal_arithmetic
+Dataset that generates basic arithmetic tasks using Decimal arithmetic and proper operator precedence.
+
+Default configuration:
+```python
+min_num_decimal_places = 6
+max_num_decimal_places = 6
+precision = 28
+terms = 6
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Please solve this problem to a maximum of 28 significant digits, rounding up from the half. Only reply with the final value.
+(0.419611*3.744855)-(9.149733+0.533225)+3.668137-9.416130 = ?
+Answer: -13.859568648595
+
+Example 2:
+Question: Please solve this problem to a maximum of 28 significant digits, rounding up from the half. Only reply with the final value.
+(4.799697-6.205510+(8.359621+9.674082*6.609140)-1.800269) = ?
+Answer: 69.09090130948
+
+Example 3:
+Question: Please solve this problem to a maximum of 28 significant digits, rounding up from the half. Only reply with the final value.
+((8.724497+6.368109)-0.488171-9.541022+(2.628828*9.915288)) = ?
+Answer: 31.128999722464
+
+````
+
+### decimal_chain_sum
+Generates simple decimal arithmetic tasks using only + and - operators
+
+Default configuration:
+```python
+min_terms = 2
+max_terms = 6
+min_digits = 1
+max_digits = 4
+min_decimal_places = 1
+max_decimal_places = 4
+allow_negation = False
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: State the final answer to the following arithmetic problem: 4.23 + 3.96 =
+Answer: 8.19
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4.23 + 3.96'}
+
+Example 2:
+Question: State the final answer to the following arithmetic problem: 812.57 - 880.2577 =
+Answer: -67.6877
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812.57 - 880.2577'}
+
+Example 3:
+Question: State the final answer to the following arithmetic problem: 2.75 - 6.5 - 3.7 + 4.7 - 0.98 =
+Answer: -3.73
+Metadata: {'difficulty': {'num_terms': 5, 'num_digits': 1}, 'expression': '2.75 - 6.5 - 3.7 + 4.7 - 0.98'}
+
+````
+
 ### dice
 Generates Dice-based puzzles with configurable parameters
 
@@ -2010,7 +2244,7 @@ size = 500
 Example tasks:
 ````
 Example 1:
-Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
+Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
 
 [[0,1,0,1,1,0,0,0,1,0],
  [1,0,0,1,0,1,1,1,1,1],
@@ -2026,7 +2260,7 @@ Answer: [[0,1,0,0,0,0,0,0,0,0],[1,1,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,
 Metadata: {'grid_size_x': 10, 'grid_size_y': 10, 'filled_cells': 100, 'simulation_steps': 1}
 
 Example 2:
-Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
+Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
 
 [[1,1,1,1,1,1,0,1,1,1],
  [0,0,1,1,1,1,1,1,1,1],
@@ -2042,7 +2276,7 @@ Answer: [[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0],[0,1,
 Metadata: {'grid_size_x': 10, 'grid_size_y': 10, 'filled_cells': 100, 'simulation_steps': 1}
 
 Example 3:
-Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
+Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])
 
 [[0,1,0,1,1,1,1,0,0,1],
  [0,1,0,0,1,1,1,0,1,1],
@@ -2386,6 +2620,87 @@ Metadata: {'words': ['hogtytyof', 'kgqwfwfgh'], 'solution': True, 'solvable': Tr
 
 ````
 
+### jugs
+Generates water jug puzzles inspired by [this scene from _Die Hard 3_](https://www.youtube.com/watch?v=6cAbgAaEOVE), with configurable parameters
+
+Default configuration:
+```python
+num_jugs = 3
+difficulty = 10
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are a police officer. A maniac has planted a bomb next to a public fountain.
+
+To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water.
+
+You have three move types: 'fill', 'empty' and 'pour'.
+
+To fill Jug A, you 'fill A'.
+To empty Jug B, you 'empty B'.
+To pour the contents of Jug A into Jug B, you 'pour A->B'.
+All jugs are empty to begin with.
+
+The empty jugs hold this many litres of water: A:13, B:13, C:4
+And your target is: 10 litres.
+
+How do you defuse the bomb?
+
+Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount.
+
+Answer: ["fill A", "pour A->C", "fill B", "empty C", "pour A->C", "empty C", "pour A->C", "empty C", "pour A->C", "pour B->C"]
+Metadata: {'puzzle': {'jug_capacities': [13, 13, 4], 'target': 10, 'min_moves': 10}}
+
+Example 2:
+Question: You are a police officer. A maniac has planted a bomb next to a public fountain.
+
+To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water.
+
+You have three move types: 'fill', 'empty' and 'pour'.
+
+To fill Jug A, you 'fill A'.
+To empty Jug B, you 'empty B'.
+To pour the contents of Jug A into Jug B, you 'pour A->B'.
+All jugs are empty to begin with.
+
+The empty jugs hold this many litres of water: A:7, B:10, C:10
+And your target is: 5 litres.
+
+How do you defuse the bomb?
+
+Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount.
+
+Answer: ["fill A", "pour A->B", "fill A", "pour A->B", "pour A->C", "fill A", "pour A->C", "empty B", "pour A->B", "fill A", "pour A->B", "fill A", "pour A->B"]
+Metadata: {'puzzle': {'jug_capacities': [7, 10, 10], 'target': 5, 'min_moves': 13}}
+
+Example 3:
+Question: You are a police officer. A maniac has planted a bomb next to a public fountain.
+
+To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water.
+
+You have three move types: 'fill', 'empty' and 'pour'.
+
+To fill Jug A, you 'fill A'.
+To empty Jug B, you 'empty B'.
+To pour the contents of Jug A into Jug B, you 'pour A->B'.
+All jugs are empty to begin with.
+
+The empty jugs hold this many litres of water: A:7, B:10, C:7
+And your target is: 2 litres.
+
+How do you defuse the bomb?
+
+Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount.
+
+Answer: ["fill B", "pour B->A", "empty A", "pour B->A", "fill B", "pour B->A", "empty A", "pour B->A", "fill B", "pour B->A", "pour B->C"]
+Metadata: {'puzzle': {'jug_capacities': [7, 10, 7], 'target': 2, 'min_moves': 11}}
+
+````
+
 ### knight_swap
 Generates Knight Swap puzzles with configurable parameters.
 
@@ -2843,6 +3158,74 @@ Metadata: {'num_words': 16, 'corruption_level': 0.516016391169858, 'scrambled_wo
 
 ````
 
+### list_functions
+Default configuration:
+```python
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are an expert at inductive reasoning. Generate an output corresponding to the given input.
+The output is generated by applying the same rule that maps input to output for the examples provided. Your answer should be a list of element/elements
+Examples:
+Input 1: [4, 95, 36, 32]
+Output 1: [4, 32, 36, 95]
+Input 2: [18, 95, 14, 87, 95, 70]
+Output 2: [14, 18, 70, 87, 95, 95]
+Input 3: [76, 55, 5, 4]
+Output 3: [4, 5, 55, 76]
+Input 4: [28, 30, 65, 78]
+Output 4: [28, 30, 65, 78]
+
+
+Input: [72, 26, 92]
+Output:
+
+Answer: [26, 72, 92]
+
+Example 2:
+Question: You are an expert at inductive reasoning. Generate an output corresponding to the given input.
+The output is generated by applying the same rule that maps input to output for the examples provided. Your answer should be a list of element/elements
+Examples:
+Input 1: [37, 90, 98]
+Output 1: [37, 90, 98]
+Input 2: [60, 48, 86, 90, 13]
+Output 2: [60, 48, 86, 90, 13]
+Input 3: [77, 64, 78, 3, 66, 56, 74, 48, 80, 71]
+Output 3: [77, 64, 78, 3, 66, 56, 74, 48, 80, 71]
+Input 4: [51, 23, 8, 14, 16, 49, 20, 13, 21]
+Output 4: [51, 23, 8, 14, 16, 49, 20, 13, 21]
+
+
+Input: [17, 99, 50, 77, 65, 35, 74, 24, 49, 9]
+Output:
+
+Answer: [17, 99, 50, 77, 65, 35, 74, 24, 49, 9]
+
+Example 3:
+Question: You are an expert at inductive reasoning. Generate an output corresponding to the given input.
+The output is generated by applying the same rule that maps input to output for the examples provided. Your answer should be a list of element/elements
+Examples:
+Input 1: [4, 29, 49, 15, 90, 23, 38, 5, 67, 5, 70]
+Output 1: [2]
+Input 2: [37, 66, 21, 15, 44, 46, 80, 10]
+Output 2: [0]
+Input 3: [13, 45, 5, 5, 5, 50, 5]
+Output 3: [4]
+Input 4: [88, 6, 87]
+Output 4: [0]
+
+
+Input: [59, 5, 81, 5, 20, 5, 61, 76, 48, 70, 5, 30]
+Output:
+
+Answer: [4]
+
+````
+
 ### manipulate_matrix
 Generates Manipulate Matrix exercises with configurable difficulty
 
@@ -3206,6 +3589,38 @@ Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_',
 
 ````
 
+### needle_haystack
+Generates "Needle in a Haystack tasks
+
+Default configuration:
+```python
+num_statements = 50
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Caolain is neutral toward music. Alexx desires writing novels. Jake bears boxing. Harold gripes about dusting the furniture. Frederick disdains ironing the curtains. Cooper enjoys astronomy hobby. Caiden-Paul applauds all-terrain vehicles. Shayne delights in politics. Bradyn accepts artificial intelligence. Tyrnan supports climbing. Michal yearns for acting. Alvin deifies penguins. Allen relishes sailing. Brooke overlooks archery. Flynn prizes cleaning the patio. Grady can’t bear brewing beer. Rio ridicules acting. Wen is committed to emptying the dishwasher. Alfy execrates weeding the garden. Sweyn deifies bats. Emlyn laments bats. Shayan is passionate about snowboarding. Mehmet idolizes bird photography. Francis pines octopuses. Nikash worships ice skating. Tymom fancies motorcycles. Jaosha rejects balance. Abdur celebrates anime. Darryn bemoans logic. Michee revels in cleaning the ceiling fan. Khaleel worships trains. Jamie rails against the color amber. Daragh exults in astronomy. Finlay scoffs at minibikes. Kenyon desires collecting postcards. Caiden worships cocktails. Brodie reviles writing novels. Linton extols virtual reality. Bryson covets playing volleyball. Kyan begrudges listening to jazz. Kieran-Scott disapproves of collecting postcards. Willum esteems indie films. Isaa is addicted to ballet dancing. Arafat finds pleasure in triathlon. Oluwafemi disapproves of astronomy hobby. Seamas is keen on diving. Cian blasts playing the banjo. Liam-Stephen loathes the color sapphire. Bilal shrugs off playing the accordion. Sol is crazy about hip-hop dancing. 
+Who execrates weeding the garden? Reply only with a name.
+Answer: Alfy
+Metadata: {'question': 'Who execrates weeding the garden? Reply only with a name.'}
+
+Example 2:
+Question: Angus disdains composting. Jazz adores trail running. Craig eschews ballet dancing. Orrin resents wolves. Leigh adores playing ping pong. Bryn spurns washing the dishes. Nyah dotes foxes. Vuyolwethu finds fulfillment in DJing. Rhoridh rails against baking cakes. Yaseen idolizes goats. Ajayraj lusts after visiting theme parks. Rooke damns building model airplanes. Morton approves of bird photography. Tiarnan curses trucks. Lennon endorses deer. Zidane resents turtles. Shergo stomachs curry. Muhammad rejoices in hip-hop dancing. Machlan bears curiosity. Diarmaid fancies ice skating. Asrar is apathetic about peacocks. Callan celebrates listening to jazz. Chukwuemeka glories in cycling. Levon is crazy about cleaning the microwave. Danniel rails against innovation. Bryden regrets luxury sedans. Daumantas enjoys solving crossword puzzles. Rokas finds pleasure in indie films. Reuben blasts cupcakes. Cobain derides listening to classical music. Loukas is keen on resilience. Vincenzo glorifies watering the garden. Riyaj is partial to scooters. Jagat shrugs off playing the harp. Thorben tolerates the color ruby. Dominick is committed to religion. Lex despises parrots. Ayden extols ultimate frisbee. Arlo is fond of listening to jazz. Tjay favors the color plum. Averon yearns surfing. Dylan-Patrick is nuts about dancing. Avi prefers space shuttles. Dedeniseoluwa celebrates playing the banjo. Johnathan finds fulfillment in beatboxing. Jakey is partial to optimism. Berkay approves of rhinos. Ryden is keen on playing water polo. Zhi is crazy about fishing. Caie disdains hip-hop dancing. 
+Who extols ultimate frisbee? Reply only with a name.
+Answer: Ayden
+Metadata: {'question': 'Who extols ultimate frisbee? Reply only with a name.'}
+
+Example 3:
+Question: Marlin pines the color teal. Rufus mocks geocaching. Sharland yearns for the color yellow. Cejay yearns exploring caves. Diarmuid reveres limousines. Lincon exults resilience. Gareth ridicules playing board games. Jerome gripes about off-road vehicles. Aliyaan loves courage. Gabriel worships trucks. Cejay craves triathlon. Taylor-Jay detests off-road vehicles. Abu adores determination. Caedyn spurns pie. Darien is indifferent to resilience. Ronnie scorns all-terrain vehicles. Josan tolerates playing saxophone. Liam scorns playing cricket. Tyson longs for scorpions. Marc-Anthony ignores making coffee. Kayne bears trail running. Kurtis blasts creativity. Beau appreciates racing cars. Kerr laments the color khaki. Jayden-Paul relishes mopping the floor. Zak appreciates metaphysics. Darroch detests beauty. Carlo regrets building model cars. Rogan stomachs listening to folk music. Baley execrates omelettes. Tyler-Jay despises washing the dishes. Bruno fancies popcorn. Jacky puts up with zoology. Kajetan mocks cleaning the oven. Calley desires the color fuchsia. Zishan supports optimism. Jeronimo can’t bear vacuuming the floor. Amolpreet mocks roller skating. Kierin regrets metaphysics. Loudon approves of ducks. Brydon despises camels. Prinay eschews roller skating. Precious reveres coffee. Edison damns playing cricket. Eason yearns ants. Codey lusts after the color ruby. Ian revels in virtual reality. Hashim respects the color blue. Armaan derides performing magic. Arafat revels in canoeing. 
+Who damns playing cricket? Reply only with a name.
+Answer: Edison
+Metadata: {'question': 'Who damns playing cricket? Reply only with a name.'}
+
+````
+
 ### number_filtering
 Generates number filtering tasks
 
@@ -3890,6 +4305,7 @@ min_terms = 2
 max_terms = 2
 min_digits = 1
 max_digits = 5
+allow_negation = False
 seed = 42
 size = 500
 ```
@@ -3930,31 +4346,64 @@ size = 500
 Example tasks:
 ````
 Example 1:
-Question: Given:
+Question: The following question is a propositional logic reasoning question.
+In the question we provide a list of premises
+The task is to infer a correct conclusion from the premise.
+FORMAT INSTRUCTIONS:
+Return the conclusion logic statement, as your final answer.
+Use the following notation to denote symbols
+OR = ∨
+AND = ∧
+IMPLIES = →
+IFF = ↔
+NOT = ¬
+Here is the question:Given:
 1. R
-2. Q
-What can we conclude?
-Answer: (P ∨ Q)
-Metadata: {'premises': ['R', 'Q'], 'variables': ['P', 'Q', 'R', 'S'], 'complexity': 3}
+.2. Q
+.What can we conclude from the above statements?
+Answer: None
+Metadata: {'premises': ['R', 'Q'], 'variables': ['P', 'Q', 'R', 'S'], 'complexity': 3, 'example_answer': '(P ∨ Q)'}
 
 Example 2:
-Question: Given:
+Question: The following question is a propositional logic reasoning question.
+In the question we provide a list of premises
+The task is to infer a correct conclusion from the premise.
+FORMAT INSTRUCTIONS:
+Return the conclusion logic statement, as your final answer.
+Use the following notation to denote symbols
+OR = ∨
+AND = ∧
+IMPLIES = →
+IFF = ↔
+NOT = ¬
+Here is the question:Given:
 1. ((Q → P) ∨ (Q → P))
-2. ((Q ↔ Q) → (P → P))
-3. P
-What can we conclude?
-Answer: (P → P)
-Metadata: {'premises': ['((Q → P) ∨ (Q → P))', '((Q ↔ Q) → (P → P))', 'P'], 'variables': ['P', 'Q'], 'complexity': 3}
+.2. ((Q ↔ Q) → (P → P))
+.3. P
+.What can we conclude from the above statements?
+Answer: None
+Metadata: {'premises': ['((Q → P) ∨ (Q → P))', '((Q ↔ Q) → (P → P))', 'P'], 'variables': ['P', 'Q'], 'complexity': 3, 'example_answer': '(Q ∨ P)'}
 
 Example 3:
-Question: Given:
+Question: The following question is a propositional logic reasoning question.
+In the question we provide a list of premises
+The task is to infer a correct conclusion from the premise.
+FORMAT INSTRUCTIONS:
+Return the conclusion logic statement, as your final answer.
+Use the following notation to denote symbols
+OR = ∨
+AND = ∧
+IMPLIES = →
+IFF = ↔
+NOT = ¬
+Here is the question:Given:
 1. ((Q ∨ P) ∧ ¬P)
-2. P
-3. ((P ∧ R) ∧ ¬R)
-4. ((Q ↔ R) → ¬Q)
-What can we conclude?
-Answer: (Q ∧ Q)
-Metadata: {'premises': ['((Q ∨ P) ∧ ¬P)', 'P', '((P ∧ R) ∧ ¬R)', '((Q ↔ R) → ¬Q)'], 'variables': ['P', 'Q', 'R'], 'complexity': 3}
+.2. P
+.3. ((P ∧ R) ∧ ¬R)
+.4. ((Q ↔ R) → ¬Q)
+.What can we conclude from the above statements?
+Answer: None
+Metadata: {'premises': ['((Q ∨ P) ∧ ¬P)', 'P', '((P ∧ R) ∧ ¬R)', '((Q ↔ R) → ¬Q)'], 'variables': ['P', 'Q', 'R'], 'complexity': 3, 'example_answer': '(Q ∧ Q)'}
 
 ````
 
@@ -4342,7 +4791,7 @@ Metadata: {'input': ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1)), 'output': ((1, 1, 1, 1,
 ````
 
 ### rectangle_count
-Generates [RectangleCount Puzzles](https://en.wikipedia.org/wiki/RectangleCount_Puzzle) with configurable parameters
+Generates ASCII rectangle counting puzzles with configurable parameters
 
 Default configuration:
 ```python
@@ -4806,6 +5255,156 @@ Metadata: {'matrix': [[28, 17, 38, 29, 8, 15, 26], [35, 13, 37, 39, 27, 40, 20],
 
 ````
 
+### rotten_oranges
+Generates Rotten Oranges exercises with configurable difficulty
+
+Default configuration:
+```python
+min_n = 10
+max_n = 30
+p_oranges = 0.85
+p_rotten = 0.1
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are given an n x n grid where each cell can have one of three values:
+- 0 representing an empty cell
+- 1 representing a fresh orange
+- 2 representing a rotten orange
+
+Every minute, any fresh orange that is 4-directionally adjacent to a rotten orange becomes rotten.
+
+Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
+If this is impossible, return -1.
+
+Example:
+- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+    2 1 1
+    1 1 0
+    0 1 1
+- Output: 4
+
+Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+1 1 1 1 2 1 1 1 1 0 1 1 1 1 1 2 1 0 1 1 1 1 1 0 0 1 1 1 1 1
+1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 2 1 0
+1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 2 1 1 2 1 1
+1 1 1 1 1 1 0 1 2 1 1 1 1 0 1 0 1 2 1 1 1 0 2 1 1 1 1 1 2 2
+2 1 2 1 2 0 1 1 2 1 1 1 1 1 0 0 1 2 1 1 1 1 1 0 1 1 0 1 1 1
+1 1 0 1 0 1 2 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1
+1 1 1 1 1 0 1 1 1 1 1 2 0 1 0 1 1 1 1 2 1 1 0 1 1 0 1 1 1 1
+1 1 1 1 1 1 1 1 1 0 1 1 2 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 0 1
+1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 0 1 1
+1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 2 0 2 1 1 1 0 1 1 0 1 1 1 1 1
+1 1 1 1 1 1 2 2 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
+1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 1 1 1
+1 1 1 0 1 1 1 1 1 1 1 1 2 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1
+1 1 2 1 1 1 1 0 1 0 1 1 1 1 1 2 1 1 2 0 2 1 1 1 1 0 2 1 1 1
+1 1 1 0 1 1 1 1 1 2 1 1 2 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1
+2 0 1 0 0 1 1 2 1 1 1 1 1 1 2 0 1 1 2 2 1 1 1 1 1 1 1 1 0 1
+2 0 0 1 1 1 0 1 1 2 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 0 1 1 1 1 1 0 1 1
+1 1 1 0 1 2 1 0 2 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 2 1 1 1
+1 1 2 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 2 1 1 1 1 1 1
+1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 0 1 2 1 1 1 1 1 1 2
+0 1 1 1 1 1 1 1 1 2 2 1 1 1 1 0 2 0 1 1 0 1 1 1 1 0 1 1 1 2
+1 1 1 0 0 1 1 0 1 1 2 1 1 1 0 0 1 2 1 1 1 1 1 1 1 0 1 1 1 0
+2 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 0 1 1 1 2 1 2 0 1
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 0 1 1 2 1 1 1 1 1 1 1 1
+0 1 1 1 1 2 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+1 1 0 1 1 2 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0
+1 1 1 1 1 2 1 1 1 1 1 1 1 0 1 1 1 0 1 2 1 1 1 1 1 1 1 1 2 0
+1 1 1 1 1 1 1 1 1 1 1 1 2 0 0 1 0 1 1 1 1 2 1 1 1 1 1 1 1 1
+
+Answer: 6
+Metadata: {'matrix': [[1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 0], [1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 2, 1, 1], [1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 2, 2], [2, 1, 2, 1, 2, 0, 1, 1, 2, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1], [1, 1, 0, 1, 0, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1], [1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1], [1, 1, 2, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 0, 2, 1, 1, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1], [2, 0, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1], [2, 0, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 0, 1, 1], [1, 1, 1, 0, 1, 2, 1, 0, 2, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1], [1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 2, 1, 1, 1, 1, 1, 1, 2], [0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 0, 2, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2], [1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0], [2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 2, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1], [0, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1]], 'solution': 6}
+
+Example 2:
+Question: You are given an n x n grid where each cell can have one of three values:
+- 0 representing an empty cell
+- 1 representing a fresh orange
+- 2 representing a rotten orange
+
+Every minute, any fresh orange that is 4-directionally adjacent to a rotten orange becomes rotten.
+
+Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
+If this is impossible, return -1.
+
+Example:
+- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+    2 1 1
+    1 1 0
+    0 1 1
+- Output: 4
+
+Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+1 0 1 1 1 1 0 0 0 2 1
+1 1 1 1 1 2 1 1 0 1 2
+1 1 1 1 1 0 1 2 0 1 0
+1 1 1 1 0 1 1 1 1 1 2
+1 1 1 1 1 2 1 1 0 1 1
+2 1 1 1 1 1 1 1 2 0 1
+1 1 1 1 1 1 1 1 1 1 1
+1 0 1 1 2 1 1 1 0 1 1
+1 1 1 1 1 1 2 1 1 1 1
+0 2 1 1 1 1 0 1 1 1 1
+1 0 1 1 1 1 1 1 0 1 1
+
+Answer: -1
+Metadata: {'matrix': [[1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 1], [1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 2], [1, 1, 1, 1, 1, 0, 1, 2, 0, 1, 0], [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2], [1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1], [2, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 1, 2, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1], [0, 2, 1, 1, 1, 1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1]], 'solution': -1}
+
+Example 3:
+Question: You are given an n x n grid where each cell can have one of three values:
+- 0 representing an empty cell
+- 1 representing a fresh orange
+- 2 representing a rotten orange
+
+Every minute, any fresh orange that is 4-directionally adjacent to a rotten orange becomes rotten.
+
+Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
+If this is impossible, return -1.
+
+Example:
+- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+    2 1 1
+    1 1 0
+    0 1 1
+- Output: 4
+
+Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 0 1 1 1 1 1
+1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0 2 1 2 1 1 0 0
+1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 1 0 1
+0 0 2 0 1 1 1 0 1 1 0 2 1 1 2 2 0 1 1 2 1 0 1
+1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1
+1 1 2 1 1 1 1 2 1 1 1 1 1 1 2 2 1 1 1 1 1 0 1
+1 1 1 1 1 2 1 1 2 1 1 1 1 0 1 1 1 1 1 1 1 1 1
+1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1
+0 1 1 1 0 0 1 1 1 0 1 1 0 2 1 1 2 1 0 1 2 0 1
+1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 2 1 1 0 1 1 1 1
+1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1
+1 1 1 0 0 1 1 0 1 1 1 1 2 1 1 0 1 0 1 1 1 1 1
+2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1
+1 1 1 0 0 0 1 2 1 1 1 1 1 2 0 1 1 1 1 1 1 1 0
+1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 2 1 1 1 1 2
+1 1 1 1 1 1 1 1 1 2 0 1 1 0 2 1 1 1 1 1 1 1 1
+1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
+2 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 2 1
+1 1 0 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 0
+1 1 0 1 1 1 1 2 1 1 1 2 1 1 1 0 1 2 1 1 1 1 1
+1 2 1 1 2 1 0 1 0 1 2 1 1 1 1 1 1 1 1 1 1 1 1
+1 1 1 1 0 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
+1 1 0 1 1 2 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 1
+
+Answer: 13
+Metadata: {'matrix': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 2, 1, 2, 1, 1, 0, 0], [1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1], [0, 0, 2, 0, 1, 1, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2, 0, 1, 1, 2, 1, 0, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1], [1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1], [0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 2, 1, 1, 2, 1, 0, 1, 2, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1], [1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1], [2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1], [1, 1, 1, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 2, 1, 1, 1, 1, 2], [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 2, 1], [1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1], [1, 2, 1, 1, 2, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1]], 'solution': 13}
+
+````
+
 ### rubiks_cube
 Generates RubiksCube tasks
 
@@ -5073,17 +5672,17 @@ size = 100
 Example tasks:
 ````
 Example 1:
-Question: Given a convex polygon with 3 sides, its first 2 interior angles are: 16.0°, 80.0°. What is the measure of the remaining interior angle (in degrees)?
+Question: Given a convex polygon with 3 sides, its first 2 interior angles are: 16.0°, 80.0°. What is the measure of the remaining interior angle (in degrees)?Return only the angle as your answer.Do not give the units in your answer.
 Answer: 84
 Metadata: {'n_sides': 3, 'known_angles': [16.0, 80.0], 'sum_of_known_angles': 96.0, 'missing_angle_raw': 84.0, 'missing_angle_rounded': 84, 'total_interior_sum': 180}
 
 Example 2:
-Question: A convex polygon has 3 sides. The measures of the first 2 interior angles are: 83.0°, 46.0°. Find the measure of the last interior angle.
+Question: A convex polygon has 3 sides. The measures of the first 2 interior angles are: 83.0°, 46.0°. Find the measure of the last interior angle.Return only the angle as your answer.Do not give the units in your answer.
 Answer: 51
 Metadata: {'n_sides': 3, 'known_angles': [83.0, 46.0], 'sum_of_known_angles': 129.0, 'missing_angle_raw': 51.0, 'missing_angle_rounded': 51, 'total_interior_sum': 180}
 
 Example 3:
-Question: Given a convex polygon with 6 sides, its first 5 interior angles are: 143.0°, 148.0°, 39.0°, 55.0°, 107.0°. What is the measure of the remaining interior angle (in degrees)?
+Question: Given a convex polygon with 6 sides, its first 5 interior angles are: 143.0°, 148.0°, 39.0°, 55.0°, 107.0°. What is the measure of the remaining interior angle (in degrees)?Return only the angle as your answer.Do not give the units in your answer.
 Answer: 228
 Metadata: {'n_sides': 6, 'known_angles': [143.0, 148.0, 39.0, 55.0, 107.0], 'sum_of_known_angles': 492.0, 'missing_angle_raw': 228.0, 'missing_angle_rounded': 228, 'total_interior_sum': 720}
 
@@ -5949,7 +6548,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
 Example 2:
 Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
 Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 19, 9, 44), 'end_time': datetime.datetime(2025, 2, 19, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 21, 9, 44), 'end_time': datetime.datetime(2025, 2, 21, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
 
 Example 3:
 Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.
@@ -6293,7 +6892,7 @@ Question: This is a logic puzzle. There are 4 houses (numbered 1 on the left, 4
 8. The one who only drinks water is Carol.
 9. Carol is the person who uses a OnePlus 9.
 
-What is Name of the person who lives in House 1?
+What is Name of the person who lives in House 1?? Provide only the name of the person as your final answer.
 Answer: bob
 Metadata: {'num_people': 4, 'num_characteristics': 4}
 
@@ -6316,7 +6915,7 @@ Question: This is a logic puzzle. There are 4 houses (numbered 1 on the left, 4
 10. The British person is directly left of the German.
 11. The person who is the mother of Billy is the person who is a pizza lover.
 
-What is Name of the person who lives in House 1?
+What is Name of the person who lives in House 1?? Provide only the name of the person as your final answer.
 Answer: carol
 Metadata: {'num_people': 4, 'num_characteristics': 4}
 
@@ -6337,7 +6936,7 @@ Question: This is a logic puzzle. There are 4 houses (numbered 1 on the left, 4
 8. The Dunhill smoker is Bob.
 9. The person who loves the soup is the person who loves blue.
 
-What is Name of the person who lives in House 1?
+What is Name of the person who lives in House 1?? Provide only the name of the person as your final answer.
 Answer: carol
 Metadata: {'num_people': 4, 'num_characteristics': 4}
 
diff --git a/README.md b/README.md
index 036bfe78..59ca1fac 100644
--- a/README.md
+++ b/README.md
@@ -1,31 +1,32 @@
-# Reasoning Gym
+# 💪🧠 Reasoning Gym
 
-We are building a python library of procedural dataset generators and algorithmically verifiable reasoning environments for training reasoning models with reinforcement learning (RL).
+**Reasoning Gym** is a community-created Python library of procedural dataset generators and algorithmically verifiable reasoning environments for training reasoning models with reinforcement learning (RL). The goal is to generate virtually infinite training data with adjustable complexity.
 
-The goal is to generate virtually infinite data with adjustable complexity.
+It currently provides **more than 80** tasks over many domains, including but not limited to _algebra_, _arithmetic_, _computation_, _cognition_, _geometry_, _graph theory_, _logic_, and many common _games_.
 
-Algorithmic verification allows to train on tasks like Rubik‘s cube or [Countdown](<https://en.wikipedia.org/wiki/Countdown_(game_show)#Numbers_Round>) which have many correct solutions.
+Some tasks have a single correct answer, while others, such as [Rubik‘s Cube](https://en.wikipedia.org/wiki/Rubik%27s_Cube) and [Countdown](<https://en.wikipedia.org/wiki/Countdown_(game_show)#Numbers_Round>), have many correct solutions. To support this, we provide a standard interface for procedurally verifying solutions.
 
-## Dataset Gallery
+## 🖼️ Dataset Gallery
 
-In [GALLERY.md](https://github.com/open-thought/reasoning-gym/blob/main/GALLERY.md) you find example outputs of all datasets available in reasoning-gym.
+In [GALLERY.md](https://github.com/open-thought/reasoning-gym/blob/main/GALLERY.md), you can find example outputs of all datasets available in `reasoning-gym`.
 
-## Installation
+## ⬇️ Installation
 
 The `reasoning-gym` package requires Python >= 3.11.
 
-Install via pip:
+Install the latest published [package from PyPI](https://pypi.org/project/reasoning-gym/) via `pip`:
 
 ```
 pip install reasoning-gym
 ```
 
-For development setup see [CONTRIBUTING.md](CONTRIBUTING.md#delevloper-setup).
+_Note that this project is currently under active development, and the version published on PyPI may be a few days behind `main`._
 
+## 🛠️ Development
 
-## How to instantiate a task dataset?
+For development setup, see [CONTRIBUTING.md](CONTRIBUTING.md#delevloper-setup).
 
-Example:
+## ✨ Example Usage
 
 ```python
 import reasoning_gym
@@ -48,7 +49,13 @@ metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
 ...
 ```
 
-## Contributing
+## 🔍 Evaluation
+
+Instructions for running the evaluation scripts are provided in [eval/README.md](https://github.com/open-thought/reasoning-gym/blob/main/eval/README.md).
+
+Evaluation results of different reasoning models will be tracked in the [reasoning-gym-eval](https://github.com/open-thought/reasoning-gym-eval) repo.
+
+## 👷 Contributing
 
 Please see [CONTRIBUTING.md](CONTRIBUTING.md).
 
diff --git a/eval/eval.py b/eval/eval.py
index 53571dd0..bbfc585d 100644
--- a/eval/eval.py
+++ b/eval/eval.py
@@ -5,7 +5,7 @@ import os
 import re
 import time
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any
 
 from openai import AsyncOpenAI
 from tqdm.asyncio import tqdm_asyncio
@@ -44,7 +44,7 @@ class AsyncOpenRouterEvaluator:
         match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
         return match.group(1).strip() if match else response
 
-    async def process_single_question(self, entry: Dict, dataset) -> Dict:
+    async def process_single_question(self, entry: dict, dataset) -> dict:
         """Process a single question and return the result."""
         response = await self.get_model_response(entry["question"])
         answer = self.parse_model_response(response)
@@ -54,11 +54,12 @@ class AsyncOpenRouterEvaluator:
             "question": entry["question"],
             "expected_answer": entry["answer"],
             "model_answer": answer,
+            "full_model_response": response,
             "score": score,
             "metadata": entry["metadata"],
         }
 
-    async def evaluate_dataset(self, dataset_config: Dict[str, Any]) -> Dict[str, Any]:
+    async def evaluate_dataset(self, dataset_config: dict[str, Any]) -> dict[str, Any]:
         """Evaluate a single dataset with concurrent question processing."""
         dataset_name = dataset_config.pop("name")
         print(f"\nEvaluating dataset: {dataset_name}")
@@ -92,7 +93,7 @@ class AsyncOpenRouterEvaluator:
             print(f"Error evaluating dataset {dataset_name}: {str(e)}")
             return None
 
-    async def evaluate_datasets(self, dataset_configs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    async def evaluate_datasets(self, dataset_configs: list[dict[str, Any]]) -> list[dict[str, Any]]:
         """Evaluate multiple datasets concurrently."""
         tasks = [self.evaluate_dataset(config) for config in dataset_configs]
 
diff --git a/eval/r1/eval.py b/eval/r1/eval.py
index 3dbc39b1..202646ac 100644
--- a/eval/r1/eval.py
+++ b/eval/r1/eval.py
@@ -5,7 +5,7 @@ import logging
 import os
 from dataclasses import asdict
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any
 
 import aiohttp
 from eval_config import EvalConfig
@@ -32,7 +32,7 @@ class OpenRouterEvaluator:
         }
         self.semaphore = asyncio.Semaphore(10)  # Control concurrency
 
-    def save_results(self, results: List[Dict[str, Any]], dataset, dataset_name) -> Dict[str, Any]:
+    def save_results(self, results: list[dict[str, Any]], dataset, dataset_name) -> dict[str, Any]:
         file_name = f"{self.output_dir}/{dataset_name}.json"
         total_score = sum(r["score"] for r in results)
 
@@ -52,7 +52,7 @@ class OpenRouterEvaluator:
             json.dump(metrics, f, indent=2)
         return metrics
 
-    def prepare_messages(self, prompt: str) -> List[Dict[str, str]]:
+    def prepare_messages(self, prompt: str) -> list[dict[str, str]]:
         return {
             "model": self.model,
             "messages": [
@@ -92,7 +92,7 @@ class OpenRouterEvaluator:
 
         raise Exception("Failed to get valid response after retries")
 
-    async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> Dict[str, Any]:
+    async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> dict[str, Any]:
         """Process a single entry with concurrency control."""
         async with self.semaphore:
             response = await self.get_model_response(session, entry["question"])
@@ -104,11 +104,12 @@ class OpenRouterEvaluator:
                 "question": entry["question"],
                 "expected_answer": str(entry["answer"]),
                 "model_answer": model_answer,
+                "full_model_response": response,
                 "score": score,
                 "metadata": str(entry["metadata"]),
             }
 
-    async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> Dict[str, Any]:
+    async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> dict[str, Any]:
         """Evaluate a single dataset asynchronously."""
         self.logger.info(f"\nEvaluating dataset: {dataset_name}")
         dataset = reasoning_gym.create_dataset(
@@ -119,7 +120,7 @@ class OpenRouterEvaluator:
         results = await asyncio.gather(*tasks)
         return self.save_results(results, dataset, dataset_name)
 
-    async def evaluate_datasets(self) -> List[Dict[str, Any]]:
+    async def evaluate_datasets(self) -> list[dict[str, Any]]:
         """Main async evaluation entry point."""
         all_results = []
         async with aiohttp.ClientSession(headers=self.headers) as session:
diff --git a/eval/r1/eval_config.py b/eval/r1/eval_config.py
index 9a6c96f6..80b76dd4 100644
--- a/eval/r1/eval_config.py
+++ b/eval/r1/eval_config.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Union
+from typing import Union
 
 import yaml
 
@@ -9,7 +9,7 @@ from reasoning_gym.utils import SYSTEM_PROMPTS
 @dataclass
 class EvalConfig:
     category: str
-    datasets: Union[str, List[str]]
+    datasets: Union[str, list[str]]
     eval_dir: str
     dataset_size: int
     dataset_seed: int
diff --git a/examples/OpenRLHF/custom_reward.py b/examples/OpenRLHF/custom_reward.py
index 638cdc97..899aa044 100644
--- a/examples/OpenRLHF/custom_reward.py
+++ b/examples/OpenRLHF/custom_reward.py
@@ -6,7 +6,7 @@ import math
 import os
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional
 
 import torch
 import torch.nn as nn
@@ -109,7 +109,7 @@ class AlgorithmicRewardExperienceMaker(NaiveExperienceMaker):
         self.dataset = dataset
 
     @torch.no_grad()
-    def generate_samples(self, all_prompts: List[Tuple[str, Any]], **generate_kwargs) -> List[Samples]:
+    def generate_samples(self, all_prompts: list[tuple[str, Any]], **generate_kwargs) -> list[Samples]:
         """
         Generate samples and return in batches.
         """
diff --git a/examples/unsloth/requirements.txt b/examples/unsloth/requirements.txt
new file mode 100644
index 00000000..a7382c2f
--- /dev/null
+++ b/examples/unsloth/requirements.txt
@@ -0,0 +1,4 @@
+peft
+pillow
+unsloth
+vllm
diff --git a/examples/unsloth/train_grpo_lora.py b/examples/unsloth/train_grpo_lora.py
new file mode 100644
index 00000000..3773feb3
--- /dev/null
+++ b/examples/unsloth/train_grpo_lora.py
@@ -0,0 +1,208 @@
+"""
+Minimal example using Unsloth and vLLM for efficient GRPO training of a model with (Q)LoRA.
+
+Adapted from Unsloth's documentation examples.
+"""
+
+from unsloth import FastLanguageModel, PatchFastRL
+
+PatchFastRL("GRPO", FastLanguageModel)
+
+import argparse
+import logging
+import re
+
+import torch
+from torch.utils.data import Dataset
+from tqdm import tqdm
+from trl import GRPOConfig, GRPOTrainer
+from unsloth import is_bfloat16_supported
+
+import reasoning_gym
+from reasoning_gym import utils
+
+
+class ReasoningGymDataset(Dataset):
+    def __init__(self, dataset_name, seed, size, tokenizer, developer_prompt, developer_role="system") -> None:
+        super().__init__()
+        self.data = reasoning_gym.create_dataset(dataset_name, seed=seed, size=size)
+        self.tokenizer = tokenizer
+        self.developer_role = developer_role
+        self.developer_prompt = developer_prompt
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        question = item["question"]
+
+        chat = []
+
+        if self.developer_role is not None:
+            chat.append({"role": self.developer_role, "content": self.developer_prompt})
+        chat.append({"role": "user", "content": question})
+
+        prompt = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+        return {"prompt": prompt, "metadata": item}
+
+
+def get_model_and_tokenizer(model_id, max_seq_length, lora_rank, quantize, gpu_memory_utilization) -> tuple:
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=model_id,
+        max_seq_length=max_seq_length,
+        max_lora_rank=lora_rank,
+        gpu_memory_utilization=gpu_memory_utilization,
+        load_in_4bit=quantize,
+        fast_inference=True,
+    )
+
+    target_modules = [
+        "q_proj",
+        "k_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+    ]
+
+    model = FastLanguageModel.get_peft_model(
+        model, r=lora_rank, target_modules=target_modules, lora_alpha=lora_rank, use_gradient_checkpointing="unsloth"
+    )
+
+    return model, tokenizer
+
+
+class GRPOTrainerCustom(GRPOTrainer):
+    def __init__(self, model, args: GRPOConfig, tokenizer, train_dataset: Dataset):
+        super().__init__(
+            model,
+            reward_funcs=[self._accuracy_reward, self._format_reward],
+            args=args,
+            train_dataset=train_dataset,
+            processing_class=tokenizer,
+        )
+
+    def _format_reward(self, completions, **kwargs):
+        regex = r"^<think>([^<]*(?:<(?!/?think>)[^<]*)*)<\/think>\n<answer>([\s\S]*?)<\/answer>$"
+        matches = [re.match(regex, completion, flags=re.DOTALL) for completion in completions]
+        return [1.0 if match else 0.0 for match in matches]
+
+    def _accuracy_reward(self, completions, metadata, **kwargs):
+        answers = [utils.extract_answer(completion) for completion in completions]
+        return [self.train_dataset.data.score_answer(answer, entry=obj) for (answer, obj) in zip(answers, metadata)]
+
+
+def train(model, tokenizer, dataset, training_args):
+    trainer = GRPOTrainerCustom(
+        model=model,
+        tokenizer=tokenizer,
+        args=training_args,
+        train_dataset=dataset,
+    )
+
+    trainer.train()
+
+    logging.info("Saving model...")
+    trainer.save_model("outputs")
+
+
+def evaluate(model, tokenizer, dataset, *args, **kwargs):
+    model.eval()
+    correct_preds = 0
+    total_preds = 0
+
+    for i in tqdm(range(len(dataset))):
+        item = dataset[i]
+        prompt = item["prompt"]
+        metadata = item["metadata"]
+        inputs = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs,
+                pad_token_id=tokenizer.eos_token_id,
+                *args,
+                **kwargs,
+            )
+
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        answer = utils.extract_answer(generated_text)
+        score = dataset.data.score_answer(answer, entry=metadata)
+        correct_preds += score
+        total_preds += 1
+
+    return correct_preds / total_preds
+
+
+def main(args):
+    model, tokenizer = get_model_and_tokenizer(
+        args.model_id, args.max_seq_length, args.lora_rank, args.quantize, args.gpu_memory_utilization
+    )
+
+    developer_prompt = utils.SYSTEM_PROMPTS["DeepSeekZero"]
+    dataset = ReasoningGymDataset(args.dataset_name, args.dataset_seed, args.dataset_size, tokenizer, developer_prompt)
+
+    training_args = GRPOConfig(
+        output_dir="outputs",
+        use_vllm=True,
+        learning_rate=5e-6,
+        adam_beta1=0.9,
+        adam_beta2=0.99,
+        weight_decay=0.1,
+        warmup_ratio=0.1,
+        lr_scheduler_type="cosine",
+        optim="adamw_8bit",
+        logging_steps=1,
+        bf16=is_bfloat16_supported(),
+        fp16=not is_bfloat16_supported(),
+        per_device_train_batch_size=args.train_batch_size,
+        gradient_accumulation_steps=1,
+        num_generations=args.num_generations,
+        num_train_epochs=args.train_epochs,
+        save_steps=100,
+        max_grad_norm=0.1,
+    )
+
+    train(model, tokenizer, dataset, training_args)
+
+    model = FastLanguageModel.for_inference(model)
+
+    eval_dataset = ReasoningGymDataset(
+        args.dataset_name,
+        args.eval_seed,
+        args.eval_size,
+        tokenizer,
+        utils.SYSTEM_PROMPTS["DeepSeekZero"],
+    )
+
+    accuracy = evaluate(model, tokenizer, eval_dataset, max_new_tokens=training_args.max_completion_length)
+    logging.info(f"Evaluation accuracy: {accuracy * 100}%")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model-id", type=str, default="Qwen/Qwen2.5-1.5B-Instruct")
+    parser.add_argument("--dataset-name", type=str)
+
+    parser.add_argument("--max-seq-length", type=int, default=1024)
+    parser.add_argument("--lora-rank", type=int, default=64)
+    parser.add_argument("--quantize", action="store_true")
+    parser.add_argument("--num-generations", type=int, default=8)
+    parser.add_argument("--train-epochs", type=int, default=1)
+    parser.add_argument("--train-batch-size", type=int, default=8)
+
+    parser.add_argument("--dataset-seed", type=int, default=42)
+    parser.add_argument("--dataset-size", type=int, default=1000)
+
+    parser.add_argument("--eval-seed", type=int, default=42)
+    parser.add_argument("--eval-size", type=int, default=100)
+
+    parser.add_argument("--gpu-memory-utilization", type=float, default=0.7)
+
+    args = parser.parse_args()
+    main(args)
diff --git a/examples/veRL/main_ppo_custom_reward_server.py b/examples/veRL/main_ppo_custom_reward_server.py
index 0f20be1d..4a0eccc0 100644
--- a/examples/veRL/main_ppo_custom_reward_server.py
+++ b/examples/veRL/main_ppo_custom_reward_server.py
@@ -1,7 +1,7 @@
 # This example is an adapted version of Bytedance's code:
 # https://github.com/volcengine/verl/blob/a65c9157bc0b85b64cd753de19f94e80a11bd871/verl/trainer/main_ppo.py
 import os
-from typing import Dict, List, Optional
+from typing import Optional
 
 import hydra
 import ray
@@ -64,12 +64,12 @@ class ReasoningGymDataset(Dataset):
             self.client.create_experiment(dataset_name, config)
 
         # Cache for batches
-        self._batch_cache: dict[int, List[BatchEntry]] = {}
+        self._batch_cache: dict[int, list[BatchEntry]] = {}
 
     def __len__(self) -> int:
         return self.size
 
-    def _get_batch(self, batch_idx: int) -> List[BatchEntry]:
+    def _get_batch(self, batch_idx: int) -> list[BatchEntry]:
         """Fetch or retrieve cached batch"""
         if batch_idx not in self._batch_cache:
             base_index = batch_idx * self.batch_size
diff --git a/examples/word_ladder/main.py b/examples/word_ladder/main.py
index 4cbd88c9..1157c093 100644
--- a/examples/word_ladder/main.py
+++ b/examples/word_ladder/main.py
@@ -14,7 +14,7 @@ from typing import Any, Dict
 from examples.word_ladder.utils import create_word_ladders, generate_reasoning
 
 
-def create_dataset(jsonl_path: Path, config: Dict[str, Any]) -> bool:
+def create_dataset(jsonl_path: Path, config: dict[str, Any]) -> bool:
     """
     Creates the word ladder dataset, handling potential exhaustion gracefully.
 
diff --git a/notebooks/gsm-symbolic-cot.txt b/notebooks/gsm-symbolic-cot.txt
index 3ebc054d..1d0ca4b2 100644
--- a/notebooks/gsm-symbolic-cot.txt
+++ b/notebooks/gsm-symbolic-cot.txt
@@ -15,7 +15,7 @@ OUTPUT 1: Output in the form which should be generated
 from random import Random
 from typing import Dict, Any
 
-def generate_from_variables(time_per_interval: int, distance_per_interval: int, total_distance: int) -> Dict[str, Any]:
+def generate_from_variables(time_per_interval: int, distance_per_interval: int, total_distance: int) -> dict[str, Any]:
     intervals = total_distance // distance_per_interval
     total_time = intervals * time_per_interval
 
@@ -36,7 +36,7 @@ def generate_from_variables(time_per_interval: int, distance_per_interval: int,
         }}
     }}
 
-def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
     # Generate random values scaled by difficulty
     distance_per_interval = int(rng.randint(2, int(10 * difficulty)))
     time_per_interval = int(rng.randint(5, int(30 * difficulty)))
@@ -57,7 +57,7 @@ def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         }}
     }}
 
-def original_example() -> Dict[str, Any]:
+def original_example() -> dict[str, Any]:
    return generate_from_variables(10, 3, 42)
 ```
 
@@ -79,7 +79,7 @@ from random import Random
 from typing import Dict, Any
 
 def generate_from_variables(name: str, food: str, rate_per_min: int, batch_size: int,
-                          time_per_batch: int, total_amount: int) -> Dict[str, Any]:
+                          time_per_batch: int, total_amount: int) -> dict[str, Any]:
     peel_time = total_amount // rate_per_min
     num_batches = total_amount // batch_size
     cook_time = num_batches * time_per_batch
@@ -110,7 +110,7 @@ def generate_from_variables(name: str, food: str, rate_per_min: int, batch_size:
         }
     }
 
-def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
     names = ["Emily", "Sarah", "Emma", "Sophia", "Olivia", "Ava", "Isabella", "Mia"]
     foods = ["shrimp", "onion", "carrot", "mushroom", "clam"]
 
@@ -139,7 +139,7 @@ def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         }
     }
 
-def original_example() -> Dict[str, Any]:
+def original_example() -> dict[str, Any]:
     return generate_from_variables("Emily", "shrimp", 6, 30, 10, 90)
 ```
 
@@ -161,7 +161,7 @@ from random import Random
 from typing import Dict, Any
 
 def generate_from_variables(family: str, item: str, total: int, n1: int, n2: int,
-                          flavor1: str, flavor2: str, flavor3: str) -> Dict[str, Any]:
+                          flavor1: str, flavor2: str, flavor3: str) -> dict[str, Any]:
     n3 = total - (n1 + n2)
 
     question = f"The {family} family is busy making {item}s. So far, they've made {total} {item}s. They have {n1} {flavor1} {item}s, {n2} {flavor2} {item}s, and some {flavor3} {item}s. How many {flavor3} {item}s have they made?"
@@ -186,7 +186,7 @@ def generate_from_variables(family: str, item: str, total: int, n1: int, n2: int
         }
     }
 
-def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
     families = ["Smith", "Johnson", "Williams", "Brown", "Jones"]
     items = ["cupcake", "muffin", "brownie", "biscuit"]
     flavors = ["vanilla", "strawberry", "blueberry", "lemon", "peanut butter"]
@@ -217,7 +217,7 @@ def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         }
     }
 
-def original_example() -> Dict[str, Any]:
+def original_example() -> dict[str, Any]:
     return generate_from_variables("Adams", "cookie", 7995, 2595, 3075,
                                  "rainbow", "oatmeal", "chocolate chip")
 ```
@@ -241,7 +241,7 @@ from typing import Dict, Any
 
 def generate_from_variables(name: str, event: str, food: str, obj: str,
                           package_husband: int, used_spoons: int,
-                          remaining_spoons: int) -> Dict[str, Any]:
+                          remaining_spoons: int) -> dict[str, Any]:
 
     total_spoons = remaining_spoons + used_spoons
     package_julia = total_spoons - package_husband
@@ -268,7 +268,7 @@ def generate_from_variables(name: str, event: str, food: str, obj: str,
         }
     }
 
-def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
     names = ['Emma', 'Olivia', 'Ava', 'Isabella', 'Sophia', 'Mia', 'Charlotte']
     events = ['lunch party', 'birthday party', 'potluck party', 'baby shower', 'game night']
     foods = ['roast chicken', 'grilled salmon', 'beef stew', 'vegetable lasagna',
@@ -298,7 +298,7 @@ def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         }
     }
 
-def original_example() -> Dict[str, Any]:
+def original_example() -> dict[str, Any]:
     return generate_from_variables('Julia', 'dinner party', 'stew', 'spoons',
                                  5, 3, 12)
 ```
diff --git a/pyproject.toml b/pyproject.toml
index 4bba76fa..fd262412 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "reasoning_gym"
-version = "0.1.8"
+version = "0.1.9"
 authors = [
   { name = "Open-Thought community", email = "andreas.koepf@xamla.com" },
 ]
diff --git a/reasoning_gym/__init__.py b/reasoning_gym/__init__.py
index 9429682d..a283cdcf 100644
--- a/reasoning_gym/__init__.py
+++ b/reasoning_gym/__init__.py
@@ -2,10 +2,10 @@
 Reasoning Gym - A library of procedural dataset generators for training reasoning models
 """
 
-from . import algebra, algorithmic, arc, arithmetic, code, cognition, data, games, geometry, graphs, logic
+from . import algebra, algorithmic, arc, arithmetic, code, cognition, data, games, geometry, graphs, induction, logic
 from .factory import create_dataset, register_dataset
 
-__version__ = "0.1.8"
+__version__ = "0.1.9"
 __all__ = [
     "arc",
     "algebra",
@@ -18,6 +18,7 @@ __all__ = [
     "geometry",
     "graphs",
     "logic",
+    "induction",
     "create_dataset",
     "register_dataset",
 ]
diff --git a/reasoning_gym/algebra/complex_arithmetic.py b/reasoning_gym/algebra/complex_arithmetic.py
index c55e1d09..5f97b9ee 100644
--- a/reasoning_gym/algebra/complex_arithmetic.py
+++ b/reasoning_gym/algebra/complex_arithmetic.py
@@ -2,7 +2,7 @@ import cmath
 import math
 import random
 from dataclasses import dataclass
-from typing import Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -13,7 +13,7 @@ class ComplexArithmeticConfig:
     max_real: int = 10
     min_imag: int = -10
     max_imag: int = 10
-    operations: Tuple[str, ...] = ("+", "-", "*", "/")
+    operations: tuple[str, ...] = ("+", "-", "*", "/")
     seed: Optional[int] = None
     size: int = 500
 
diff --git a/reasoning_gym/algebra/intermediate_integration.py b/reasoning_gym/algebra/intermediate_integration.py
index 84e01d9b..b4fb28ea 100644
--- a/reasoning_gym/algebra/intermediate_integration.py
+++ b/reasoning_gym/algebra/intermediate_integration.py
@@ -241,7 +241,7 @@ In addition, when doing calculation, use the following instructions together wit
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the problem"""
         reward = 0.0
         metadata = entry["metadata"]
diff --git a/reasoning_gym/algebra/polynomial_equations.py b/reasoning_gym/algebra/polynomial_equations.py
index cd4842ee..c069c32f 100644
--- a/reasoning_gym/algebra/polynomial_equations.py
+++ b/reasoning_gym/algebra/polynomial_equations.py
@@ -1,8 +1,7 @@
 import math
 import random
-import string
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from sympy import Eq, Symbol, expand, solve
 
@@ -21,7 +20,7 @@ class PolynomialEquationsConfig:
     max_value: int = 100  # Maximum value for coefficients
     min_degree: int = 1  # Minimum polynomial degree
     max_degree: int = 3  # Maximum polynomial degree
-    operators: Tuple[str, ...] = (
+    operators: tuple[str, ...] = (
         "+",
         "-",
     )  # Allowed operators between terms, Avoid adding '*' or '/' because they will affect the degree
@@ -163,7 +162,7 @@ In solving the equations, please abide by the following instruction:
 
         return polynomial_expr
 
-    def _parse_score_to_list(self, answer: Optional[str]) -> List[float]:
+    def _parse_score_to_list(self, answer: Optional[str]) -> list[float]:
         """Parses a comma-separated string of scores into a sorted list of floats.
 
         This method takes a string containing comma-separated numeric values,
@@ -193,7 +192,7 @@ In solving the equations, please abide by the following instruction:
 
         return sorted(output_float_vals)  # Return the sorted list of floats
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """
         Score an answer based on its numerical distance to oracle solutions using exponential decay.
         This function compares a predicted answer (or list of answers) to a set of oracle solutions
diff --git a/reasoning_gym/algebra/polynomial_multiplication.py b/reasoning_gym/algebra/polynomial_multiplication.py
index 8617db17..09e2530a 100644
--- a/reasoning_gym/algebra/polynomial_multiplication.py
+++ b/reasoning_gym/algebra/polynomial_multiplication.py
@@ -1,7 +1,6 @@
 import random
-import warnings
 from dataclasses import dataclass
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Optional
 
 import sympy as sp
 from sympy.polys.monomials import itermonomials
@@ -23,10 +22,10 @@ class PolynomialMultiplicationConfig:
     max_degree: int = 3  # Maximum polynomial degree
     min_polynomials: int = 2  # Minimum number of polynomials being multiplied
     max_polynomials: int = 3  # Maximum number of polynomials being multiplied
-    variables: Tuple[str] = ("x", "y", "z")  # Tuple of variable names, that will be chosen randomly
+    variables: tuple[str] = ("x", "y", "z")  # Tuple of variable names, that will be chosen randomly
     allow_cross_variable_product: bool = False  # Generate tasks like "Multiply (x^2+3x-1)*(y^2-5)"
     allow_multivariate_polynomials: bool = False  # Generate multivariate tasks like "Multiply (2x^2 + 3y)*(5x^2+3x-1)"
-    operators: Tuple[str, ...] = (
+    operators: tuple[str, ...] = (
         "+",
         "-",
     )  # Allowed operators between terms, Avoid adding '*' or '/' because they will affect the degree
@@ -146,7 +145,7 @@ In addition, When doing calculation, Use the following instructions together wit
 
         return polynomial_expr
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         reward = 0.0
         metadata = entry["metadata"]
         if answer is not None:
diff --git a/reasoning_gym/algebra/simple_equations.py b/reasoning_gym/algebra/simple_equations.py
index c8de94b8..778f51a9 100644
--- a/reasoning_gym/algebra/simple_equations.py
+++ b/reasoning_gym/algebra/simple_equations.py
@@ -1,7 +1,7 @@
 import random
 import string
 from dataclasses import dataclass
-from typing import Optional, Tuple
+from typing import Optional
 
 from sympy import Symbol
 
@@ -69,7 +69,7 @@ class SimpleEquationsDataset(ProceduralDataset):
         """Get a random lowercase variable name"""
         return rng.choice(string.ascii_lowercase)
 
-    def _generate_equation(self, rng: random.Random, variable: str) -> Tuple[str, int]:
+    def _generate_equation(self, rng: random.Random, variable: str) -> tuple[str, int]:
         """Generate an equation and its solution
 
         Args:
diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py
index 8dfa775b..019cdf90 100644
--- a/reasoning_gym/algebra/simple_integration.py
+++ b/reasoning_gym/algebra/simple_integration.py
@@ -86,7 +86,7 @@ In addition, When doing calculation, Use the following instructions together wit
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the problem"""
         reward = 0.0
         metadata = entry["metadata"]
diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py
index 2581789f..ad9cccd3 100644
--- a/reasoning_gym/algorithmic/__init__.py
+++ b/reasoning_gym/algorithmic/__init__.py
@@ -8,6 +8,7 @@ Algorithmic tasks for training reasoning capabilities:
 
 from .ab import ABConfig, ABDataset
 from .base_conversion import BaseConversionConfig, BaseConversionDataset
+from .binary_alternation import BinaryAlternationConfig, BinaryAlternationDataset
 from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset
 from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
 from .count_primes import CountPrimesConfig, CountPrimesDataset
@@ -16,6 +17,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 from .graph_color import GraphColorConfig, GraphColorDataset
 from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
 from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
+from .jugs import JugsConfig, JugsDataset
 from .letter_counting import LetterCountingConfig, LetterCountingDataset
 from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset
 from .manipulate_matrix import ManipulateMatrixConfig, ManipulateMatrixDataset
@@ -26,6 +28,7 @@ from .palindrome_partitioning import PalindromePartitioningConfig, PalindromePar
 from .pool_matrix import PoolMatrixConfig, PoolMatrixDataset
 from .ransom_note import RansomNoteConfig, RansomNoteDataset
 from .rotate_matrix import RotateMatrixConfig, RotateMatrixDataset
+from .rotten_oranges import RottenOrangesConfig, RottenOrangesDataset
 from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset
 from .spell_backward import SpellBackwardConfig, SpellBackwardDataset
 from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset
@@ -99,4 +102,10 @@ __all__ = [
     "StringSplittingDataset",
     "StringSynthesisConfig",
     "StringSynthesisDataset",
+    "RottenOrangesConfig",
+    "RottenOrangesDataset",
+    "JugsConfig",
+    "JugsDataset",
+    "BinaryAlternationConfig",
+    "BinaryAlternationDataset",
 ]
diff --git a/reasoning_gym/algorithmic/ab.py b/reasoning_gym/algorithmic/ab.py
index 1daea0e9..139e4af8 100644
--- a/reasoning_gym/algorithmic/ab.py
+++ b/reasoning_gym/algorithmic/ab.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -129,14 +129,14 @@ Return the final state of the program.
             "metadata": {},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the AB task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/base_conversion.py b/reasoning_gym/algorithmic/base_conversion.py
index 48c7ecbf..bd897a89 100644
--- a/reasoning_gym/algorithmic/base_conversion.py
+++ b/reasoning_gym/algorithmic/base_conversion.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from random import Random
-from typing import Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -61,7 +61,7 @@ class BaseConversionDataset(ProceduralDataset):
         else:
             return f"base-{base}"
 
-    def _generate_conversion(self, rng: Random) -> Tuple[int, int, int]:
+    def _generate_conversion(self, rng: Random) -> tuple[int, int, int]:
         """Generate random value and source/target bases"""
         value = rng.randint(self.config.min_value, self.config.max_value)
 
diff --git a/reasoning_gym/algorithmic/binary_alternation.py b/reasoning_gym/algorithmic/binary_alternation.py
new file mode 100644
index 00000000..ca204c6d
--- /dev/null
+++ b/reasoning_gym/algorithmic/binary_alternation.py
@@ -0,0 +1,114 @@
+"""Minimum number of swaps to make a binary string alternating
+
+https://leetcode.com/problems/minimum-number-of-swaps-to-make-the-binary-string-alternating/description/
+"""
+
+from dataclasses import dataclass
+from random import Random
+from typing import Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+QUESTION_TEMPLATE = """Given a binary string, return the minimum number of character swaps to make it alternating, or -1 if it is impossible.
+
+The string is called alternating if no two adjacent characters are equal. For example, the strings "010" and "1010" are alternating, while the string "0100" is not.
+
+Any two characters may be swapped, even if they are not adjacent.
+
+Example:
+- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
+- Output: 1
+
+Now, determine the minimum number of swaps to make the following binary string alternating: {string}
+"""
+
+
+@dataclass
+class BinaryAlternationConfig:
+    """Configuration for Count Bits dataset generation"""
+
+    min_n: int = 10  # Minimum number of bits in the binary string
+    max_n: int = 30  # Maximum number of bits in the binary string
+    p_solvable: float = 0.8  # Probability of generating a solvable sample
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert 1 <= self.min_n, "Minimum number of bits must be at least 1"
+        assert self.min_n <= self.max_n, "Minimum number of bits must be <= maximum number of bits"
+        assert 0 <= self.p_solvable <= 1, "Probability of generating a 1 must be in [0, 1]"
+
+
+class BinaryAlternationDataset(ProceduralDataset):
+    """Generates Binary Alternation exercises with configurable difficulty"""
+
+    def __init__(self, config: BinaryAlternationConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def _get_binary_string(self, rng: Random, solvable: bool) -> str:
+        n = rng.randint(self.config.min_n, self.config.max_n)
+        ones, zeros = n // 2, n // 2
+
+        # Check if we need to add an extra bit
+        if n % 2 == 1:
+            if rng.random() < 0.5:
+                ones += 1
+            else:
+                zeros += 1
+
+        if not solvable:
+            if ones > zeros:
+                ones += 1
+            elif ones < zeros:
+                zeros += 1
+            else:
+                # Randomly add 2 bits of the same type
+                if rng.random() < 0.5:
+                    ones += 2
+                else:
+                    zeros += 2
+
+        # Generate the string
+        string = ["1"] * ones + ["0"] * zeros
+        rng.shuffle(string)
+        return "".join(string)
+
+    def _get_answer(self, string: str) -> int:
+        """Calculate the minimum number of swaps to make the string alternating"""
+
+        def get_num_swaps(expected):
+            incorrect = 0
+            for c in string:
+                if c != expected:
+                    incorrect += 1
+                expected = "1" if expected == "0" else "0"
+            return incorrect // 2  # number of swaps is half of incorrect positions
+
+        ones, zeros = string.count("1"), string.count("0")
+        if abs(ones - zeros) > 1:
+            return -1  # impossible to make alternating
+        if ones > zeros:
+            return get_num_swaps("1")
+        elif ones < zeros:
+            return get_num_swaps("0")
+        else:
+            return min(get_num_swaps("0"), get_num_swaps("1"))
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Count Bits question"""
+        rng = Random(self.seed + idx)
+
+        solvable = rng.random() < self.config.p_solvable
+        string = self._get_binary_string(rng, solvable)
+        answer = self._get_answer(string)
+
+        return {
+            "question": QUESTION_TEMPLATE.format(string=string),
+            "answer": str(answer),
+            "metadata": {"string": string, "solution": answer, "solvable": solvable},
+        }
+
+
+register_dataset("binary_alternation", BinaryAlternationDataset, BinaryAlternationConfig)
diff --git a/reasoning_gym/algorithmic/binary_matrix.py b/reasoning_gym/algorithmic/binary_matrix.py
index 509b8042..92317e78 100644
--- a/reasoning_gym/algorithmic/binary_matrix.py
+++ b/reasoning_gym/algorithmic/binary_matrix.py
@@ -7,7 +7,7 @@ https://leetcode.com/problems/01-matrix/description/
 from collections import deque
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -112,7 +112,7 @@ class BinaryMatrixDataset(ProceduralDataset):
         """Get a string representation of the matrix"""
         return "\n".join(" ".join(str(x) for x in row) for row in matrix)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["answer"]
         if answer is not None:
diff --git a/reasoning_gym/algorithmic/cryptarithm.py b/reasoning_gym/algorithmic/cryptarithm.py
index 52c492d1..e04516a1 100644
--- a/reasoning_gym/algorithmic/cryptarithm.py
+++ b/reasoning_gym/algorithmic/cryptarithm.py
@@ -211,14 +211,14 @@ class CryptarithmDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the Cryptarithm task.
 
         The function awards 1.0 for a correct format and answers for all alphabet pairs.
 
         Args:
             answer (Optional[str]): The user's answer already parsed by `extract_answer`
-            answer_str (Dict[str, any]): The original dataset answer_str containing the correct answer. ie "A=1,B=3..."
+            answer_str (dict[str, Any]): The original dataset answer_str containing the correct answer. ie "A=1,B=3..."
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/game_of_life.py b/reasoning_gym/algorithmic/game_of_life.py
index b4ac1855..7a1647e7 100644
--- a/reasoning_gym/algorithmic/game_of_life.py
+++ b/reasoning_gym/algorithmic/game_of_life.py
@@ -1,7 +1,7 @@
 import json
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 import cellpylib as cpl
 
@@ -86,14 +86,14 @@ class GameOfLifeDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the GoL task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/graph_color.py b/reasoning_gym/algorithmic/graph_color.py
index 8f730b25..3e1287b5 100644
--- a/reasoning_gym/algorithmic/graph_color.py
+++ b/reasoning_gym/algorithmic/graph_color.py
@@ -1,7 +1,7 @@
 import json
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -209,14 +209,14 @@ Return your solution as a JSON map of vertices to colors. (For example: {{0: 1,
             "metadata": {"possible_answer": solution, "puzzle": puzzle},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the GraphColor task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py
index e3f65013..caf46357 100644
--- a/reasoning_gym/algorithmic/group_anagrams.py
+++ b/reasoning_gym/algorithmic/group_anagrams.py
@@ -10,7 +10,7 @@ import json
 from collections import defaultdict
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..data import get_data_file_path
 from ..factory import ProceduralDataset, register_dataset
@@ -88,7 +88,7 @@ class GroupAnagramsDataset(ProceduralDataset):
         anagrams = list(res.values())
         return self._sort_nested_list(anagrams)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Score a single Group Anagrams question"""
         reward = 0.0
         if answer is not None:
diff --git a/reasoning_gym/algorithmic/jugs.py b/reasoning_gym/algorithmic/jugs.py
new file mode 100644
index 00000000..a09893d0
--- /dev/null
+++ b/reasoning_gym/algorithmic/jugs.py
@@ -0,0 +1,313 @@
+import json
+import math
+from collections import deque
+from dataclasses import dataclass
+from functools import reduce
+from random import Random
+from typing import Dict, List, Optional, Tuple
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+def min_moves_n(jug_capacities: List[int], target: int) -> Optional[int]:
+    """
+    Compute the minimum number of moves required to have exactly `target` gallons
+    in any one jug for a puzzle with multiple jugs.
+    The state is represented as a tuple (w1, w2, ..., wn), where each wi is the current
+    amount in jug i.
+
+    Allowed moves:
+      - Fill jug i to its capacity.
+      - Empty jug i.
+      - Pour from jug i to jug j until jug i is empty or jug j is full.
+
+    Returns the minimal move count if a solution exists, otherwise None.
+    """
+    n = len(jug_capacities)
+    start = tuple([0] * n)
+    queue = deque([(start, 0)])
+    visited = set([start])
+
+    while queue:
+        state, moves = queue.popleft()
+
+        # Check if any jug has the target amount.
+        if any(w == target for w in state):
+            return moves
+
+        # Generate next states.
+        next_states = []
+
+        # 1. Fill any jug.
+        for i in range(n):
+            new_state = list(state)
+            new_state[i] = jug_capacities[i]
+            next_states.append(tuple(new_state))
+
+        # 2. Empty any jug.
+        for i in range(n):
+            new_state = list(state)
+            new_state[i] = 0
+            next_states.append(tuple(new_state))
+
+        # 3. Pour from one jug to another.
+        for i in range(n):
+            for j in range(n):
+                if i == j:
+                    continue
+                if state[i] == 0 or state[j] == jug_capacities[j]:
+                    continue
+                new_state = list(state)
+                # Maximum water that can be poured from i to j.
+                amount = min(state[i], jug_capacities[j] - state[j])
+                new_state[i] -= amount
+                new_state[j] += amount
+                next_states.append(tuple(new_state))
+
+        # Add valid next states to the queue.
+        for ns in next_states:
+            if ns not in visited:
+                visited.add(ns)
+                queue.append((ns, moves + 1))
+
+    return None
+
+
+def generate_puzzle(rng: Random, num_jugs: int = 3, difficulty: int = 6, max_attempts: int = 10000) -> Dict[str, any]:
+    """
+    Generate a multi-jug water puzzle.
+
+    Parameters:
+      - num_jugs: number of jugs to use (>=2; default 3).
+      - difficulty: minimal required moves for a solution.
+      - max_attempts: maximum attempts to generate a puzzle meeting the difficulty.
+
+    For a valid puzzle:
+      - Each jug gets a random capacity (between 3 and 3+difficulty).
+      - The target is chosen as one of the numbers 1 .. (max_capacity) that is a multiple
+        of the gcd of all jug capacities.
+
+    Returns a dictionary with:
+       { "jug_capacities": [c1, c2, ...],
+         "target": target,
+         "min_moves": minimum moves required }.
+
+    Raises a ValueError if no puzzle is generated after max_attempts.
+    """
+    for _ in range(max_attempts):
+        # Generate capacities for each jug.
+        jug_capacities = [rng.randint(3, 3 + difficulty) for _ in range(num_jugs)]
+        max_cap = max(jug_capacities)
+        # Compute gcd of all jug capacities.
+        gcd_all = reduce(math.gcd, jug_capacities)
+        # Possible targets are between 1 and max_cap that are multiples of gcd_all.
+        possible_targets = [t for t in range(1, max_cap + 1) if t % gcd_all == 0]
+        if not possible_targets:
+            continue
+        target = rng.choice(possible_targets)
+
+        moves = min_moves_n(jug_capacities, target)
+        if moves is not None and moves >= difficulty:
+            return {"jug_capacities": jug_capacities, "target": target, "min_moves": moves}
+    raise ValueError(f"Could not generate a puzzle with difficulty at least {difficulty} using {num_jugs} jugs.")
+
+
+def verify_solution(puzzle, moves):
+    """
+    Verify a given solution for a multi-jug puzzle.
+
+    The puzzle is a dictionary with keys:
+      - "jug_capacities": list of capacities for each jug.
+      - "target": the target amount that must be in any one jug.
+
+    Moves should be a list of strings in the following formats:
+      - "fill X": Fill jug X to its capacity.
+      - "empty X": Empty jug X.
+      - "pour X->Y": Pour water from jug X to jug Y.
+
+    Jug labels are letters: jug 0 is "A", jug 1 is "B", etc.
+
+    The function simulates the moves starting from all jugs empty.
+
+    Returns a tuple (result, states) where:
+      - result is True if, after executing all moves, at least one jug has exactly
+        the target amount; otherwise False.
+      - states is a list of state tuples after each move.
+    """
+    jug_capacities = puzzle["jug_capacities"]
+    target = puzzle["target"]
+    n = len(jug_capacities)
+
+    # Map jug letters to indices (A->0, B->1, C->2, etc.)
+    jug_map = {chr(ord("A") + i): i for i in range(n)}
+
+    state = tuple([0] * n)
+    states = [state]
+
+    for move in moves:
+        tokens = move.split()
+        if tokens[0] == "fill":
+            # Move format: "fill X"
+            jug = tokens[1]
+            idx = jug_map[jug]
+            state = list(state)
+            state[idx] = jug_capacities[idx]
+            state = tuple(state)
+        elif tokens[0] == "empty":
+            # Move format: "empty X"
+            jug = tokens[1]
+            idx = jug_map[jug]
+            state = list(state)
+            state[idx] = 0
+            state = tuple(state)
+        elif tokens[0] == "pour":
+            # Move format: "pour X->Y"
+            # Expect tokens[1] to be in the form "X->Y"
+            parts = tokens[1].split("->")
+            if len(parts) != 2:
+                raise ValueError(f"Invalid pour move format: {move}")
+            source, dest = parts
+            i = jug_map[source]
+            j = jug_map[dest]
+            state = list(state)
+            amount = min(state[i], jug_capacities[j] - state[j])
+            state[i] -= amount
+            state[j] += amount
+            state = tuple(state)
+        else:
+            raise ValueError(f"Unknown move: {move}")
+        states.append(state)
+
+    return (any(w == target for w in state), states)
+
+
+def generate_jug_solution(jug_capacities: Tuple[int, int, int], target: int) -> List[str]:
+    """Solves the jug puzzle and returns a sequence of formatted steps."""
+    capacities = list(jug_capacities)
+    initial_state = (0, 0, 0)
+    queue = deque([(initial_state, [])])
+    visited = set()
+
+    while queue:
+        (state, path) = queue.popleft()
+
+        if target in state:
+            return path  # Solution found
+
+        if state in visited:
+            continue
+        visited.add(state)
+
+        for i in range(3):  # Iterate over each jug
+            # Fill jug i
+            new_state = list(state)
+            new_state[i] = capacities[i]
+            queue.append((tuple(new_state), path + [f"fill {chr(65 + i)}"]))
+
+            # Empty jug i
+            new_state = list(state)
+            new_state[i] = 0
+            queue.append((tuple(new_state), path + [f"empty {chr(65 + i)}"]))
+
+            # Pour from jug i to jug j
+            for j in range(3):
+                if i != j:
+                    new_state = list(state)
+                    pour_amount = min(state[i], capacities[j] - state[j])
+                    new_state[i] -= pour_amount
+                    new_state[j] += pour_amount
+                    queue.append((tuple(new_state), path + [f"pour {chr(65 + i)}->{chr(65 + j)}"]))
+
+    return ["No solution"]  # No valid solution found
+
+
+@dataclass
+class JugsConfig:
+    """Configuration for Jugs puzzle generation"""
+
+    num_jugs: int = 3  # Number of jugs in the puzzle (affects puzzle complexity and solution space)
+    difficulty: int = 10  # Minimum required moves to solve the puzzle. Also affects max jug capacity (3 + difficulty)
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert self.num_jugs > 2, "num_jugs must be gt 2"
+        assert self.difficulty > 0, "difficulty must be gt 0"
+        assert self.difficulty < 200, "difficulty must be lt 200"
+
+
+class JugsDataset(ProceduralDataset):
+    """Generates water jug puzzles inspired by [this scene from _Die Hard 3_](https://www.youtube.com/watch?v=6cAbgAaEOVE), with configurable parameters"""
+
+    def __init__(self, config: JugsConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Jugs task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+        rng = Random(self.seed + idx)
+
+        puzzle = generate_puzzle(rng, num_jugs=self.config.num_jugs, difficulty=self.config.difficulty)
+        solution = generate_jug_solution(puzzle["jug_capacities"], puzzle["target"])
+
+        cap_str = ", ".join(f"{chr(ord('A')+i)}:{cap}" for i, cap in enumerate(puzzle["jug_capacities"]))
+        question = f"""You are a police officer. A maniac has planted a bomb next to a public fountain.
+
+To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water.
+
+You have three move types: 'fill', 'empty' and 'pour'.
+
+To fill Jug A, you 'fill A'.
+To empty Jug B, you 'empty B'.
+To pour the contents of Jug A into Jug B, you 'pour A->B'.
+All jugs are empty to begin with.
+
+The empty jugs hold this many litres of water: {cap_str}
+And your target is: {puzzle['target']} litres.
+
+How do you defuse the bomb?
+
+Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount.
+"""
+
+        return {
+            "question": question,
+            "answer": json.dumps(solution),  # one possible solution
+            "metadata": {"puzzle": puzzle},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the Jugs task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer is None:
+            return 0.0
+
+        try:
+            danswer = json.loads(answer)
+            valid, _ = verify_solution(entry["metadata"]["puzzle"], danswer)
+            if not valid:
+                return 0.01
+            else:
+                return 1.0  # Yay
+        except Exception as e:
+            return 0.01
+
+
+register_dataset("jugs", JugsDataset, JugsConfig)
diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py
index b659f6d5..86cce2b3 100644
--- a/reasoning_gym/algorithmic/letter_jumble.py
+++ b/reasoning_gym/algorithmic/letter_jumble.py
@@ -3,7 +3,7 @@
 import re
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from reasoning_gym.data import read_data_file
 
@@ -123,14 +123,14 @@ class LetterJumbleDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves this task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/number_filtering.py b/reasoning_gym/algorithmic/number_filtering.py
index 64d8bc02..f122f04c 100644
--- a/reasoning_gym/algorithmic/number_filtering.py
+++ b/reasoning_gym/algorithmic/number_filtering.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -39,7 +39,7 @@ class NumberFilteringDataset(ProceduralDataset):
         """Format a number with specified decimal places"""
         return f"{num:.{decimals}f}"
 
-    def _generate_numbers(self, rng: Random) -> Tuple[List[float], List[str]]:
+    def _generate_numbers(self, rng: Random) -> tuple[list[float], list[str]]:
         """Generate list of numbers and their string representations"""
         count = rng.randint(self.config.min_numbers, self.config.max_numbers)
         numbers = []
diff --git a/reasoning_gym/algorithmic/number_sorting.py b/reasoning_gym/algorithmic/number_sorting.py
index f906d230..c7170347 100644
--- a/reasoning_gym/algorithmic/number_sorting.py
+++ b/reasoning_gym/algorithmic/number_sorting.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -46,7 +46,7 @@ Please follow the instruction below:
         # Reparse to ensure exact decimal representation
         return f"{float(formatted):.{decimals}f}"
 
-    def _generate_numbers(self, rng: Random) -> Tuple[List[float], List[str]]:
+    def _generate_numbers(self, rng: Random) -> tuple[list[float], list[str]]:
         """Generate list of numbers and their string representations"""
         count = rng.randint(self.config.min_numbers, self.config.max_numbers)
         decimals = rng.randint(self.config.min_decimals, self.config.max_decimals)
diff --git a/reasoning_gym/algorithmic/palindrome_generation.py b/reasoning_gym/algorithmic/palindrome_generation.py
index ce2db322..2eca2693 100644
--- a/reasoning_gym/algorithmic/palindrome_generation.py
+++ b/reasoning_gym/algorithmic/palindrome_generation.py
@@ -90,7 +90,7 @@ class PalindromeDataset(ProceduralDataset):
         """Return the palindrome string from the letter set."""
         return "".join(letters)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided is a valid palindrome.
         The answer is expected to be a single string
 
diff --git a/reasoning_gym/algorithmic/palindrome_partitioning.py b/reasoning_gym/algorithmic/palindrome_partitioning.py
index 6a2dbe60..e0d41870 100644
--- a/reasoning_gym/algorithmic/palindrome_partitioning.py
+++ b/reasoning_gym/algorithmic/palindrome_partitioning.py
@@ -8,7 +8,7 @@ import json
 import string
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -93,7 +93,7 @@ class PalindromePartitioningDataset(ProceduralDataset):
         _partition(0)
         return self._sort_list(res)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Score a single Palindrome Partitioning question"""
         if answer is not None:
             try:
diff --git a/reasoning_gym/algorithmic/pool_matrix.py b/reasoning_gym/algorithmic/pool_matrix.py
index dda7ed2d..4d14c9cf 100644
--- a/reasoning_gym/algorithmic/pool_matrix.py
+++ b/reasoning_gym/algorithmic/pool_matrix.py
@@ -1,9 +1,8 @@
 """Perform average / max pooling on a matrix"""
 
-from copy import deepcopy
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 import numpy as np
 
@@ -95,7 +94,7 @@ class PoolMatrixDataset(ProceduralDataset):
             ]
         )
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Score the answer based on the metadata"""
 
         reward = 0.0
diff --git a/reasoning_gym/algorithmic/ransom_note.py b/reasoning_gym/algorithmic/ransom_note.py
index 633cb62c..2a1826a0 100644
--- a/reasoning_gym/algorithmic/ransom_note.py
+++ b/reasoning_gym/algorithmic/ransom_note.py
@@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/
 from collections import defaultdict
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -95,14 +95,14 @@ class RansomNoteDataset(ProceduralDataset):
             "metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves this task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/algorithmic/rotten_oranges.py b/reasoning_gym/algorithmic/rotten_oranges.py
new file mode 100644
index 00000000..ca7f8817
--- /dev/null
+++ b/reasoning_gym/algorithmic/rotten_oranges.py
@@ -0,0 +1,132 @@
+"""Find how many steps it takes for all oranges in a grid to rot.
+
+A popular Leetcode problem:
+https://leetcode.com/problems/rotting-oranges/description/
+"""
+
+from collections import deque
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+QUESTION_TEMPLATE = """You are given an n x n grid where each cell can have one of three values:
+- 0 representing an empty cell
+- 1 representing a fresh orange
+- 2 representing a rotten orange
+
+Every minute, any fresh orange that is 4-directionally adjacent to a rotten orange becomes rotten.
+
+Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
+If this is impossible, return -1.
+
+Example:
+- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+    2 1 1
+    1 1 0
+    0 1 1
+- Output: 4
+
+Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
+{matrix}
+"""
+
+
+@dataclass
+class RottenOrangesConfig:
+    """Configuration for Rotten Oranges dataset generation"""
+
+    min_n: int = 10  # Minimum size of the matrix
+    max_n: int = 30  # Maximum size of the matrix
+    p_oranges: float = 0.85  # Percent of grid cells populated with oranges
+    p_rotten: float = 0.1  # Percent of oranges that are initially rotten
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert 1 <= self.min_n, "min_n must be at least 1"
+        assert self.min_n <= self.max_n, "min_n must be less than or equal to max_n"
+        assert 0 < self.p_oranges <= 1, "p_oranges must be between 0 and 1"
+        assert 0 < self.p_rotten <= 1, "p_rotten must be between 0 and 1"
+
+
+class RottenOrangesDataset(ProceduralDataset):
+    """Generates Rotten Oranges exercises with configurable difficulty"""
+
+    def __init__(self, config: RottenOrangesConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def _matrix_to_str(self, matrix: list[list[int]]) -> str:
+        """Get a string representation of the matrix"""
+        return "\n".join(" ".join(str(x) for x in row) for row in matrix)
+
+    def _get_initial_matrix(self, rng: Random) -> list[list[int]]:
+        """Generate a random matrix with oranges"""
+        n = rng.randint(self.config.min_n, self.config.max_n)
+        matrix = [[0] * n for _ in range(n)]
+        for i in range(n):
+            for j in range(n):
+                if rng.random() < self.config.p_oranges:
+                    matrix[i][j] = 1
+                    if rng.random() < self.config.p_rotten:
+                        matrix[i][j] = 2
+        return matrix
+
+    def _get_answer(self, matrix: list[list[int]]) -> int:
+        """Calculate the number of steps it takes for all oranges to rot"""
+        ROWS, COLS = len(matrix), len(matrix[0])
+        DIRS = [[1, 0], [-1, 0], [0, 1], [0, -1]]
+
+        q, visited = deque(), set()
+        infected, healthy, clock = 0, 0, 0
+
+        for r in range(ROWS):
+            for c in range(COLS):
+                if matrix[r][c] == 2:
+                    visited.add((r, c))
+                    q.append((r, c))
+                elif matrix[r][c] == 1:
+                    healthy += 1
+
+        while True:
+            temp = deque()
+            while q:
+                r, c = q.popleft()
+                for dr, dc in DIRS:
+                    new_r, new_c = r + dr, c + dc
+                    if (
+                        0 <= new_r < ROWS
+                        and 0 <= new_c < COLS
+                        and (new_r, new_c) not in visited
+                        and matrix[new_r][new_c] == 1
+                    ):
+                        infected += 1
+                        visited.add((new_r, new_c))
+                        temp.append((new_r, new_c))
+            if temp:
+                q = temp
+            else:
+                break
+            clock += 1
+
+        return clock if infected == healthy else -1
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Rotten Oranges question"""
+        rng = Random(self.seed + idx)
+
+        matrix = self._get_initial_matrix(rng)
+        matrix_str = self._matrix_to_str(matrix)
+        answer = self._get_answer(matrix)
+
+        return {
+            "question": QUESTION_TEMPLATE.format(matrix=matrix_str),
+            "answer": str(answer),
+            "metadata": {"matrix": matrix, "solution": answer},
+        }
+
+
+register_dataset("rotten_oranges", RottenOrangesDataset, RottenOrangesConfig)
diff --git a/reasoning_gym/algorithmic/sentence_reordering.py b/reasoning_gym/algorithmic/sentence_reordering.py
index f1303f09..b96fe8a4 100644
--- a/reasoning_gym/algorithmic/sentence_reordering.py
+++ b/reasoning_gym/algorithmic/sentence_reordering.py
@@ -92,7 +92,7 @@ class SentenceReorderingDataset(ProceduralDataset):
             "metadata": {"word_count": word_count},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         reward = 0.0
         expected_answer = entry["answer"]
         if answer is not None:
diff --git a/reasoning_gym/algorithmic/spell_backward.py b/reasoning_gym/algorithmic/spell_backward.py
index 60af94b6..9a978372 100644
--- a/reasoning_gym/algorithmic/spell_backward.py
+++ b/reasoning_gym/algorithmic/spell_backward.py
@@ -49,7 +49,7 @@ class SpellBackwardDataset(ProceduralDataset):
             "metadata": {"word": word, "word_len": len(word)},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         reward = 0.0
         expected_answer = entry["answer"]
         if answer is not None:
diff --git a/reasoning_gym/algorithmic/spiral_matrix.py b/reasoning_gym/algorithmic/spiral_matrix.py
index fe65cc87..17aff844 100644
--- a/reasoning_gym/algorithmic/spiral_matrix.py
+++ b/reasoning_gym/algorithmic/spiral_matrix.py
@@ -6,7 +6,7 @@ https://leetcode.com/problems/spiral-matrix/description/
 
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -116,7 +116,7 @@ class SpiralMatrixDataset(ProceduralDataset):
             "metadata": {"matrix": matrix, "solution": answer},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["answer"].strip()
 
diff --git a/reasoning_gym/algorithmic/string_insertion.py b/reasoning_gym/algorithmic/string_insertion.py
index 77ea075f..d09b8a92 100644
--- a/reasoning_gym/algorithmic/string_insertion.py
+++ b/reasoning_gym/algorithmic/string_insertion.py
@@ -5,7 +5,7 @@ https://github.com/yongchao98/CodeSteer-v1.0/blob/main/create_dataset/create_dat
 
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -80,7 +80,7 @@ class StringInsertionDataset(ProceduralDataset):
                 i += 1
         return "".join(output)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["answer"]
         if answer is not None:
diff --git a/reasoning_gym/algorithmic/word_ladder.py b/reasoning_gym/algorithmic/word_ladder.py
index 928a4c97..18670d4b 100644
--- a/reasoning_gym/algorithmic/word_ladder.py
+++ b/reasoning_gym/algorithmic/word_ladder.py
@@ -3,7 +3,7 @@
 from collections import deque
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Any, Optional
 
 from ..data import get_data_file_path
 from ..factory import ProceduralDataset, register_dataset
@@ -82,7 +82,7 @@ class WordLadderDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
     @classmethod
-    def _load_words_from_csv(cls, min_length: int = 3, max_length: int = 5) -> Dict[int, Set[str]]:
+    def _load_words_from_csv(cls, min_length: int = 3, max_length: int = 5) -> dict[int, set[str]]:
         """Load words from CSV file organized by length"""
         # Validate length range before processing
         assert 3 <= min_length <= max_length <= 5, "Word length must be between 3 and 5 inclusive"
@@ -117,7 +117,7 @@ class WordLadderDataset(ProceduralDataset):
 
         return word_sets
 
-    def _get_neighbors(self, word: str, word_set: Set[str]) -> Set[str]:
+    def _get_neighbors(self, word: str, word_set: set[str]) -> set[str]:
         """Get neighbors from either precomputed graph or by computing on demand"""
         # Try precomputed graph first
         if len(word) in self.word_graphs and word in self.word_graphs[len(word)]:
@@ -132,7 +132,7 @@ class WordLadderDataset(ProceduralDataset):
                     neighbors.add(neighbor)
         return neighbors
 
-    def _build_word_graph(self, word_length: int) -> Dict[str, Set[str]]:
+    def _build_word_graph(self, word_length: int) -> dict[str, set[str]]:
         """Build graph of word connections for given length, using caching"""
         # Return cached graph if it exists
         if word_length in self.word_graphs:
@@ -156,7 +156,7 @@ class WordLadderDataset(ProceduralDataset):
         self.word_graphs[word_length] = graph
         return self.word_graphs[word_length]
 
-    def _find_path(self, start: str, end: str, word_set: Set[str]) -> Optional[List[str]]:
+    def _find_path(self, start: str, end: str, word_set: set[str]) -> Optional[list[str]]:
         """Simplified path finding using BFS for shortest paths"""
         # Early exit if words are direct neighbors
         if end in self._get_neighbors(start, word_set):
@@ -181,7 +181,7 @@ class WordLadderDataset(ProceduralDataset):
 
         return None
 
-    def _generate_word_pair(self, rng: Random, length: int) -> Tuple[str, str, List[str]]:
+    def _generate_word_pair(self, rng: Random, length: int) -> tuple[str, str, list[str]]:
         """Simplified word pair generation"""
         word_set = self.word_sets[length]
         words_list = sorted(word_set)
@@ -220,7 +220,7 @@ class WordLadderDataset(ProceduralDataset):
             "metadata": {"start_word": start, "end_word": end, "word_length": length, "chain_length": len(path)},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         if answer is None:
             return 0
 
diff --git a/reasoning_gym/algorithmic/word_sorting.py b/reasoning_gym/algorithmic/word_sorting.py
index 34951c0c..61c8b61d 100644
--- a/reasoning_gym/algorithmic/word_sorting.py
+++ b/reasoning_gym/algorithmic/word_sorting.py
@@ -4,7 +4,7 @@ import re
 from dataclasses import dataclass
 from enum import StrEnum
 from random import Random
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..data import read_data_file
 from ..factory import ProceduralDataset, register_dataset
@@ -84,7 +84,7 @@ class WordSortingDataset(ProceduralDataset):
             return "".join(c.upper() if rng.choice([True, False]) else c.lower() for c in word)
         return word  # ORIGINAL case
 
-    def _generate_words(self, rng: Random) -> Tuple[List[str], List[str]]:
+    def _generate_words(self, rng: Random) -> tuple[list[str], list[str]]:
         """Generate list of words and their transformed versions"""
         count = rng.randint(self.config.min_words, self.config.max_words)
 
@@ -122,7 +122,7 @@ class WordSortingDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         oracle_answer = entry["metadata"]["sorted_words"]
         if answer is not None and len(answer) > 0:
             parsed_answer = [word.strip() for word in re.split(r",\s*", answer)]
diff --git a/reasoning_gym/arc/arc_1d_tasks.py b/reasoning_gym/arc/arc_1d_tasks.py
index 61151b34..d74d75ed 100644
--- a/reasoning_gym/arc/arc_1d_tasks.py
+++ b/reasoning_gym/arc/arc_1d_tasks.py
@@ -1,13 +1,13 @@
 from random import Random
-from typing import Dict, List, Optional
+from typing import Optional
 
 
-def gen_field(size: int, color: int = 0) -> List[int]:
+def gen_field(size: int, color: int = 0) -> list[int]:
     """Generate a field of given size filled with specified color (default 0)."""
     return [color] * size
 
 
-def write_block(pos: int, block: List[int], field: List[int]) -> List[int]:
+def write_block(pos: int, block: list[int], field: list[int]) -> list[int]:
     """Write a block into a field at given position."""
     result = field.copy()
     for i, color in enumerate(block):
@@ -15,7 +15,7 @@ def write_block(pos: int, block: List[int], field: List[int]) -> List[int]:
     return result
 
 
-def task_move_n_pix(rng: Random, size: int, move_pix: int, solid: bool) -> Optional[Dict[str, List[int]]]:
+def task_move_n_pix(rng: Random, size: int, move_pix: int, solid: bool) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block is moved to the right by move_pix pixels."""
     if size <= move_pix + 1:
         return None
@@ -35,7 +35,7 @@ def task_move_n_pix(rng: Random, size: int, move_pix: int, solid: bool) -> Optio
     return {"input": question, "output": answer}
 
 
-def task_move_n_pix_wrapped(rng: Random, size: int, move_pix: int, solid: bool) -> Optional[Dict[str, List[int]]]:
+def task_move_n_pix_wrapped(rng: Random, size: int, move_pix: int, solid: bool) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block is moved to the right by move_pix pixels with wrapping."""
     block_size = rng.randint(1, size)
     block_pos = rng.randint(0, size)
@@ -56,7 +56,7 @@ def task_move_n_pix_wrapped(rng: Random, size: int, move_pix: int, solid: bool)
     return {"input": question, "output": answer}
 
 
-def task_gravity(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_gravity(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where all non-zero elements are attracted to the left."""
     density = 0.5
     question = [rng.randint(1, 9) if rng.random() < density else 0 for _ in range(size)]
@@ -67,7 +67,7 @@ def task_gravity(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
     return {"input": question, "output": answer}
 
 
-def task_gravity_counting(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_gravity_counting(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where non-zero elements are counted and represented as a sequence of 1s."""
     density = 0.5
     question = [rng.randint(1, 9) if rng.random() < density else 0 for _ in range(size)]
@@ -78,7 +78,7 @@ def task_gravity_counting(rng: Random, size: int) -> Optional[Dict[str, List[int
     return {"input": question, "output": answer}
 
 
-def task_gravity_antigravity(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_gravity_antigravity(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where color 1 moves right and color 2 moves left."""
     density = 0.5
     question = [rng.randint(1, 2) if rng.random() < density else 0 for _ in range(size)]
@@ -90,7 +90,7 @@ def task_gravity_antigravity(rng: Random, size: int) -> Optional[Dict[str, List[
     return {"input": question, "output": answer}
 
 
-def task_block_touch_dot(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_block_touch_dot(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block moves to touch (but not cover) a dot."""
     dot_color = 1
     block_color = rng.randint(2, 9)
@@ -129,7 +129,7 @@ def task_block_touch_dot(rng: Random, size: int) -> Optional[Dict[str, List[int]
     return {"input": question, "output": answer}
 
 
-def task_block_touch_dot_n_pix(rng: Random, size: int, move_pix: int) -> Optional[Dict[str, List[int]]]:
+def task_block_touch_dot_n_pix(rng: Random, size: int, move_pix: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block moves move_pix pixels toward a dot."""
     dot_color = 2
     block_color = rng.randint(3, 9)
@@ -172,7 +172,7 @@ def task_block_touch_dot_n_pix(rng: Random, size: int, move_pix: int) -> Optiona
     return {"input": question, "output": answer}
 
 
-def task_block_scale_to_dot(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_block_scale_to_dot(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block scales to touch a dot (keeping one end fixed)."""
     dot_color = 2
     block_color = rng.randint(3, 9)
@@ -213,7 +213,7 @@ def task_block_scale_to_dot(rng: Random, size: int) -> Optional[Dict[str, List[i
     return {"input": question, "output": answer}
 
 
-def task_two_points_and_fill(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_two_points_and_fill(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where space between two points of same color is filled with that color."""
     color = rng.randint(1, 9)
 
@@ -235,7 +235,7 @@ def task_two_points_and_fill(rng: Random, size: int) -> Optional[Dict[str, List[
     return {"input": question, "output": answer}
 
 
-def task_reflect_block_with_border_pixel(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_reflect_block_with_border_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block with a border pixel is reflected."""
     block_size = rng.randint(2, size)
     if block_size > size:
@@ -262,7 +262,7 @@ def task_reflect_block_with_border_pixel(rng: Random, size: int) -> Optional[Dic
     return {"input": question, "output": answer}
 
 
-def task_reflect_block_with_border_pixel_random(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_reflect_block_with_border_pixel_random(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a random-colored block with a border pixel is reflected."""
     block_size = rng.randint(2, size)
     if block_size > size:
@@ -290,7 +290,7 @@ def task_reflect_block_with_border_pixel_random(rng: Random, size: int) -> Optio
     return {"input": question, "output": answer}
 
 
-def task_reflect_block_around_dot(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_reflect_block_around_dot(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block is reflected around a dot."""
     dot_color = 2
 
@@ -328,7 +328,7 @@ def task_reflect_block_around_dot(rng: Random, size: int) -> Optional[Dict[str,
     return {"input": question, "output": answer}
 
 
-def task_block_and_noise_remove(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_block_and_noise_remove(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where noise around a block needs to be removed."""
     block_size = rng.randint(2, size)
     if block_size > size:
@@ -379,7 +379,7 @@ def task_block_and_noise_remove(rng: Random, size: int) -> Optional[Dict[str, Li
     return {"input": question, "output": answer}
 
 
-def task_block_and_noise_remove_inside(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_block_and_noise_remove_inside(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where noise inside a block needs to be removed."""
     if size <= 6:
         return None
@@ -419,7 +419,7 @@ def task_block_and_noise_remove_inside(rng: Random, size: int) -> Optional[Dict[
     return {"input": question, "output": answer}
 
 
-def task_copy_block_to_dots(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_copy_block_to_dots(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block pattern is copied to dot positions."""
     block_size = 3 if rng.random() < 0.5 else 5
     if block_size >= size:
@@ -456,7 +456,7 @@ def task_copy_block_to_dots(rng: Random, size: int) -> Optional[Dict[str, List[i
     return {"input": question, "output": answer}
 
 
-def task_copy_block_to_dots_colors(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_copy_block_to_dots_colors(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block pattern is copied to dot positions with matching colors."""
     block_size = 3 if rng.random() < 0.5 else 5
     if block_size >= size:
@@ -497,7 +497,7 @@ def task_copy_block_to_dots_colors(rng: Random, size: int) -> Optional[Dict[str,
     return {"input": question, "output": answer}
 
 
-def task_paint_biggest_block(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_paint_biggest_block(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where the largest block is painted a different color."""
     target_color = 1
     initial_color = rng.randint(2, 9)
@@ -535,7 +535,7 @@ def task_paint_biggest_block(rng: Random, size: int) -> Optional[Dict[str, List[
     return {"input": question, "output": answer}
 
 
-def task_sort_blocks_by_size(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_sort_blocks_by_size(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where blocks are sorted by size with 1 pixel gaps."""
     color = rng.randint(1, 9)
     blocks = []
@@ -579,7 +579,7 @@ def task_sort_blocks_by_size(rng: Random, size: int) -> Optional[Dict[str, List[
     return {"input": question, "output": answer}
 
 
-def task_sort_complete_sequence(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_sort_complete_sequence(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a complete sequence of block sizes is sorted."""
     # Calculate max possible block size given total array size
     max_size = 1
@@ -617,7 +617,7 @@ def task_sort_complete_sequence(rng: Random, size: int) -> Optional[Dict[str, Li
     return {"input": question, "output": answer}
 
 
-def task_recolor_blocks_by_size(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_recolor_blocks_by_size(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where two blocks are recolored based on their size."""
     # Generate two different random sizes
     size1 = rng.randint(2, 8)
@@ -656,7 +656,7 @@ def task_recolor_blocks_by_size(rng: Random, size: int) -> Optional[Dict[str, Li
     return {"input": question, "output": answer}
 
 
-def task_gravity_one_step(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_gravity_one_step(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where non-zero elements move one step left if possible."""
     question = [rng.randint(1, 9) if rng.random() < 0.5 else 0 for _ in range(size)]
     answer = question.copy()
@@ -670,7 +670,7 @@ def task_gravity_one_step(rng: Random, size: int) -> Optional[Dict[str, List[int
     return {"input": question, "output": answer}
 
 
-def task_move_block_by_own_size(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_move_block_by_own_size(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block moves right by its own size."""
     block_size = rng.randint(1, size // 2)  # Ensure space for movement
     pos = rng.randint(0, size - block_size * 2)  # Space for block and movement
@@ -685,7 +685,7 @@ def task_move_block_by_own_size(rng: Random, size: int) -> Optional[Dict[str, Li
     return {"input": question, "output": answer}
 
 
-def task_change_to_five(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_change_to_five(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where all non-zero colors change to 5."""
     density = 0.5
     question = [rng.randint(1, 9) if rng.random() < density else 0 for _ in range(size)]
@@ -694,7 +694,7 @@ def task_change_to_five(rng: Random, size: int) -> Optional[Dict[str, List[int]]
     return {"input": question, "output": answer}
 
 
-def task_recolor_blocks_from_palette(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_recolor_blocks_from_palette(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where blocks are recolored using a color palette."""
     # Generate blocks of same size
     block_size = rng.randint(2, 4)
@@ -750,7 +750,7 @@ def task_recolor_blocks_from_palette(rng: Random, size: int) -> Optional[Dict[st
     return {"input": question, "output": answer}
 
 
-def task_duplicate_block_from_seeds(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_duplicate_block_from_seeds(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a block is duplicated from seed pixels."""
     block_size = rng.randint(2, 4)
     if block_size + 1 >= size:
@@ -812,7 +812,7 @@ def task_duplicate_block_from_seeds(rng: Random, size: int) -> Optional[Dict[str
     return {"input": question, "output": answer}
 
 
-def task_fill_from_pixel(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_fill_from_pixel(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a pixel fills in one direction until hitting another pixel."""
     block_size = rng.randint(3, 6)
     if block_size >= size - 2:
@@ -856,7 +856,7 @@ def task_fill_from_pixel(rng: Random, size: int) -> Optional[Dict[str, List[int]
     return {"input": question, "output": answer}
 
 
-def task_mark_size_two_blocks(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_mark_size_two_blocks(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where size-2 blocks are marked with surrounding pixels."""
     blocks = []
     pos = 0
@@ -908,7 +908,7 @@ def task_mark_size_two_blocks(rng: Random, size: int) -> Optional[Dict[str, List
     return {"input": question, "output": answer}
 
 
-def task_fill_until_collision(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_fill_until_collision(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where pixels fill empty space until collision."""
     # At least 4 positions for meaningful puzzle
     if size < 4:
@@ -975,7 +975,7 @@ def task_fill_until_collision(rng: Random, size: int) -> Optional[Dict[str, List
     return {"input": question, "output": answer}
 
 
-def task_repeat_pattern_full(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_repeat_pattern_full(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where a pattern is repeated to fill the space."""
     # Generate initial pattern
     pattern_size = rng.randint(2, 5)
@@ -1007,7 +1007,7 @@ def task_repeat_pattern_full(rng: Random, size: int) -> Optional[Dict[str, List[
     return {"input": question, "output": answer}
 
 
-def task_gravity_weighted_colors(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_gravity_weighted_colors(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where color 2 is heavier than color 1 in gravity."""
     # Generate random field with only colors 1 and 2
     question = [rng.randint(1, 2) if rng.random() < 0.5 else 0 for _ in range(size)]
@@ -1030,7 +1030,7 @@ def task_gravity_weighted_colors(rng: Random, size: int) -> Optional[Dict[str, L
     return {"input": question, "output": answer}
 
 
-def task_color_left_half_blocks(rng: Random, size: int) -> Optional[Dict[str, List[int]]]:
+def task_color_left_half_blocks(rng: Random, size: int) -> Optional[dict[str, list[int]]]:
     """Generate a task where left half of blocks are colored differently."""
     pos = 0
     question = gen_field(size)
@@ -1063,21 +1063,21 @@ def task_color_left_half_blocks(rng: Random, size: int) -> Optional[Dict[str, Li
     return {"input": question, "output": answer}
 
 
-def task_mirror(task_result: Optional[Dict[str, List[int]]]) -> Optional[Dict[str, List[int]]]:
+def task_mirror(task_result: Optional[dict[str, list[int]]]) -> Optional[dict[str, list[int]]]:
     """Mirror the input and output arrays of a task result."""
     if task_result is None:
         return None
     return {"input": list(reversed(task_result["input"])), "output": list(reversed(task_result["output"]))}
 
 
-def task_inverse(task_result: Optional[Dict[str, List[int]]]) -> Optional[Dict[str, List[int]]]:
+def task_inverse(task_result: Optional[dict[str, list[int]]]) -> Optional[dict[str, list[int]]]:
     """Swap the input and output arrays of a task result."""
     if task_result is None:
         return None
     return {"input": task_result["output"], "output": task_result["input"]}
 
 
-def task_identity(task_result: Optional[Dict[str, List[int]]]) -> Optional[Dict[str, List[int]]]:
+def task_identity(task_result: Optional[dict[str, list[int]]]) -> Optional[dict[str, list[int]]]:
     """Return the task result unchanged."""
     return task_result
 
diff --git a/reasoning_gym/arc/board_format.py b/reasoning_gym/arc/board_format.py
index 554ba299..b0a40636 100644
--- a/reasoning_gym/arc/board_format.py
+++ b/reasoning_gym/arc/board_format.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass, field
-from typing import List, Tuple
 
 ARC_PROMPT_TEMPLATE = """Find the common rule that maps an input grid to an output grid, given the examples below.
 
@@ -21,7 +20,7 @@ class BoardFormattingOptions:
 
 
 def format_board(
-    board: List[List[int]], formatting_options: BoardFormattingOptions, with_board_shape: bool = False
+    board: list[list[int]], formatting_options: BoardFormattingOptions, with_board_shape: bool = False
 ) -> str:
     """
     Format a board as a string
@@ -65,7 +64,7 @@ def format_board(
 
 def format_board_pair(
     index: int,
-    pair: dict[str, List[List[int]]],
+    pair: dict[str, list[list[int]]],
     formatting_options: BoardFormattingOptions,
 ) -> str:
     """
@@ -82,7 +81,7 @@ def format_board_pair(
     return f"Example {index}:\n\nInput:\n{input_element}\nOutput:\n{output_element}\n\n"
 
 
-def parse_board(formatted_str: str, formatting_options: BoardFormattingOptions) -> Tuple[Tuple[int, ...], ...]:
+def parse_board(formatted_str: str, formatting_options: BoardFormattingOptions) -> tuple[tuple[int, ...], ...]:
     """
     Convert a formatted board string back to a tuple grid using formatting options
     """
diff --git a/reasoning_gym/arc/rearc.py b/reasoning_gym/arc/rearc.py
index ac362f9a..35712705 100644
--- a/reasoning_gym/arc/rearc.py
+++ b/reasoning_gym/arc/rearc.py
@@ -95,7 +95,7 @@ class ReArcDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: str, entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: str, entry: dict[str, Any]) -> float:
         reward = 0.0
         metadata = entry["metadata"]
         if answer is not None:
diff --git a/reasoning_gym/arc/rearc_utils/utils.py b/reasoning_gym/arc/rearc_utils/utils.py
index a6fe0ff4..456c50e4 100644
--- a/reasoning_gym/arc/rearc_utils/utils.py
+++ b/reasoning_gym/arc/rearc_utils/utils.py
@@ -1,5 +1,5 @@
 import random
-from typing import Any, List, Tuple
+from typing import Any
 
 from .dsl import *
 
@@ -40,7 +40,7 @@ def get_pso_difficulty(example: dict) -> float:
     return (pix_pct + col_pct + obj_dens) / 3
 
 
-def unifint(rng: random.Random, diff_lb: float, diff_ub: float, bounds: Tuple[int, int]) -> int:
+def unifint(rng: random.Random, diff_lb: float, diff_ub: float, bounds: tuple[int, int]) -> int:
     """
     rng
     diff_lb: lower bound for difficulty, must be in range [0, diff_ub]
@@ -83,7 +83,7 @@ def strip_prefix(string: str, prefix: str) -> str:
     return string[len(prefix) :]
 
 
-def format_grid(grid: List[List[int]]) -> Grid:
+def format_grid(grid: list[list[int]]) -> Grid:
     """
     grid type casting
     """
diff --git a/reasoning_gym/arithmetic/__init__.py b/reasoning_gym/arithmetic/__init__.py
index 89ac5cf5..5a5ae48b 100644
--- a/reasoning_gym/arithmetic/__init__.py
+++ b/reasoning_gym/arithmetic/__init__.py
@@ -3,6 +3,7 @@ Arithmetic tasks for training reasoning capabilities:
 """
 
 from .basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
+from .bitwise_arithmetic import BitwiseArithmeticConfig, BitwiseArithmeticDataset
 from .calendar_arithmetic import CalendarArithmeticConfig, CalendarArithmeticDataset
 from .chain_sum import ChainSumConfig, ChainSumDataset
 from .count_bits import CountBitsConfig, CountBitsDataset
@@ -55,4 +56,6 @@ __all__ = [
     "DecimalArithmeticDataset",
     "DecimalChainSumConfig",
     "DecimalChainSumDataset",
+    "BitwiseArithmeticConfig",
+    "BitwiseArithmeticDataset",
 ]
diff --git a/reasoning_gym/arithmetic/basic_arithmetic.py b/reasoning_gym/arithmetic/basic_arithmetic.py
index e8a1cb94..0c3ee345 100644
--- a/reasoning_gym/arithmetic/basic_arithmetic.py
+++ b/reasoning_gym/arithmetic/basic_arithmetic.py
@@ -64,9 +64,6 @@ class BasicArithmeticDataset(ProceduralDataset):
 
     def __init__(self, config: BasicArithmeticDatasetConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
-        self.added_instruction = (
-            " Ensure to report the answer as an integer. Do not add commas to the integer answers reported."
-        )
 
     def __getitem__(self, idx: int) -> dict[str, Any]:
         """Generate a single arithmetic task
@@ -91,7 +88,7 @@ class BasicArithmeticDataset(ProceduralDataset):
         else:
             expression, result = self._generate_simple_task(rng, num_terms, num_digits)
 
-        question = self._format_question(rng, expression) + self.added_instruction
+        question = self._format_question(rng, expression)
 
         return {
             "question": question,
diff --git a/reasoning_gym/arithmetic/bitwise_arithmetic.py b/reasoning_gym/arithmetic/bitwise_arithmetic.py
new file mode 100644
index 00000000..5c76b962
--- /dev/null
+++ b/reasoning_gym/arithmetic/bitwise_arithmetic.py
@@ -0,0 +1,177 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Any, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class BitwiseArithmeticConfig:
+    """Configuration for Bitwise arithmetic dataset generation"""
+
+    difficulty: int = 2  # Controls expression complexity: 1=simple expressions, 2=nested expressions, 3+=deeper nesting
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self) -> None:
+        """Validate configuration parameters"""
+        assert 0 < self.difficulty, "difficulty must be gt 0"
+        assert 10 >= self.difficulty, "difficulty must be lte 10"
+
+
+def generate_expression(rng: Random, max_depth: int) -> str:
+    """
+    Recursively generate a random arithmetic expression that includes
+    standard arithmetic (+, -, *) and bitwise shifting (<<, >>) operators.
+    All numbers are represented in hexadecimal format as multi-byte values.
+
+    Parameters:
+        rng (Random): Random number generator instance
+        max_depth (int): Maximum depth of nested expressions.
+
+    Returns:
+        str: A string representing the generated expression.
+    """
+    # Base case: return a random multi-byte number in hex (0x100 to 0xFFFF).
+    if max_depth <= 0:
+        return hex(rng.randint(0x100, 0xFFFF))
+
+    # Occasionally return a simple hex number even if max_depth > 0.
+    if rng.random() < 0.01:
+        return hex(rng.randint(0x100, 0xFFFF))
+
+    # Choose a random operator.
+    operators = ["+", "-", "*", "<<", ">>"]
+    op = rng.choice(operators)
+
+    # Generate left and right subexpressions.
+    left_expr = generate_expression(rng, max_depth - 1)
+    right_expr = generate_expression(rng, max_depth - 1)
+
+    # For bitwise shift operations, keep the right operand small (in hex).
+    if op in ["<<", ">>"]:
+        right_expr = hex(rng.randint(0, 3))
+
+    return f"({left_expr} {op} {right_expr})"
+
+
+def generate_problem(rng: Random, difficulty: int = 1) -> tuple[str, str]:
+    """
+    Generate a random arithmetic problem involving multi-byte hexadecimal numbers.
+
+    The 'difficulty' parameter controls the complexity:
+      - difficulty=1: Simple expressions like (0x123 + 0x456)
+      - difficulty=2: Nested expressions like ((0x123 + 0x456) << 1)
+      - difficulty=3: More complex nesting like ((0x123 + 0x456) << (0x789 >> 1))
+      Higher values continue to increase nesting depth and expression complexity.
+
+    Parameters:
+        rng (Random): Random number generator instance
+        difficulty (int): The difficulty level (1 = simplest; higher values = more complex).
+
+    Returns:
+        tuple: (problem_str, correct_answer)
+          - problem_str (str): The generated arithmetic expression (with hex numbers).
+          - correct_answer (str): The evaluated result, formatted as a hex string.
+    """
+    max_depth = max(1, difficulty)
+    problem_str = generate_expression(rng, max_depth)
+    correct_value = eval(problem_str)
+    correct_answer = hex(correct_value)
+
+    return problem_str, correct_answer
+
+
+def verify_solution(problem, user_solution):
+    """
+    Verify if the provided solution is correct for the given problem.
+
+    Parameters:
+        problem (str): The arithmetic expression (with hex numbers).
+        user_solution (str or int): The user's answer, either as a hex string (e.g., "0xa")
+            or an integer.
+
+    Returns:
+        bool: True if the user's answer matches the evaluated result, else False.
+    """
+    try:
+        correct_value = eval(problem)
+        # Use base=0 for automatic base detection: 0x->hex, 0b->binary, 0o->octal, no prefix->decimal
+        user_value = int(str(user_solution), 0)
+    except Exception:
+        return False
+
+    return correct_value == user_value
+
+
+class BitwiseArithmeticDataset(ProceduralDataset):
+    """Dataset that generates tasks testing understanding of bitwise arithmetic operations.
+
+    Generates expressions combining:
+    - Standard arithmetic operators (+, -, *)
+    - Bitwise shift operators (<<, >>)
+    - Multi-byte hexadecimal numbers (e.g. 0x100 to 0xFFFF)
+
+    The difficulty parameter controls expression complexity:
+    - Level 1: Simple expressions like (0x123 + 0x456)
+    - Level 2: Nested expressions with shifts like ((0x123 + 0x456) << 1)
+    - Level 3+: Deeper nesting like ((0x123 + 0x456) << (0x789 >> 1))
+
+    Each task provides:
+    - A question asking to evaluate an expression
+    - The correct answer in hexadecimal format
+    - Metadata including the raw expression
+
+    The dataset verifies answers by evaluating them as Python expressions,
+    supporting both integer and hexadecimal string formats.
+    """
+
+    def __init__(self, config: BitwiseArithmeticConfig) -> None:
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict[str, Any]:
+        """
+        Generate a single arithmetic task.
+
+        Returns:
+            dict: Contains:
+              - 'question': The formatted arithmetic expression as a string.
+              - 'answer': The computed hexidecimal result.
+              - 'metadata': Additional metadata, including just the problem without prompt.
+        """
+        # Create a deterministic RNG from base seed and index.
+        rng: Random = Random(self.seed + idx if self.seed is not None else None)
+
+        problem, answer = generate_problem(
+            rng,
+            self.config.difficulty,
+        )
+        problem_str = (
+            f"Please solve this problem. Assume there is arbitrary bit depth and that there are signed integers. Reply only with the final hexidecimal value.\n"
+            + problem
+        )
+
+        return {"question": problem_str, "answer": answer, "metadata": {"problem": problem}}
+
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        """
+        Compares the user's answer with the correct answer.
+
+        Returns:
+            float: 1.0 if the user's answer is correct; otherwise, 0.01 unless no answer is provided, in which case 0.
+        """
+        if answer is None:
+            return 0.0
+
+        try:
+            solved = verify_solution(entry["metadata"]["problem"], answer)
+            if solved:
+                return 1.0
+        except Exception:
+            return 0.01
+
+        return 0.01
+
+
+# Register the dataset with the factory.
+register_dataset("bitwise_arithmetic", BitwiseArithmeticDataset, BitwiseArithmeticConfig)
diff --git a/reasoning_gym/arithmetic/calendar_arithmetic.py b/reasoning_gym/arithmetic/calendar_arithmetic.py
index 3a052590..9b307001 100644
--- a/reasoning_gym/arithmetic/calendar_arithmetic.py
+++ b/reasoning_gym/arithmetic/calendar_arithmetic.py
@@ -4,7 +4,7 @@ import random
 from dataclasses import dataclass
 from datetime import date, timedelta
 from enum import Enum, StrEnum, auto
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -51,7 +51,7 @@ class CalendarTask(StrEnum):
 @dataclass
 class CalendarArithmeticConfig:
     year: int = 2022
-    tasks: Optional[List[str]] = None
+    tasks: Optional[list[str]] = None
     offset_upper_bound: int = 100
     leap_year_range: int = 200
     seed: Optional[int] = 42
@@ -131,7 +131,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
             "metadata": metadata,
         }
 
-    def _weekday_offset(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _weekday_offset(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         Task: Given a starting date and a day offset (which may be positive or negative),
         ask what day of the week it will be.
@@ -170,7 +170,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, target_weekday, metadata
 
-    def _weekday_of_date(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _weekday_of_date(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: Ask what day of the week a given date was.
         example:
@@ -193,7 +193,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, answer_weekday, metadata
 
-    def _weekday_of_date_from_first_day(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _weekday_of_date_from_first_day(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: Given an hypothetical weekday for January 1, ask what weekday a later date in the year falls on.
         example:
@@ -235,7 +235,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, answer_weekday, metadata
 
-    def _recurring_event_day(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _recurring_event_day(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: For a recurring event defined by an ordinal weekday pattern in a month,
         ask on which day of the month the event occurs.
@@ -294,7 +294,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, str(event_day), metadata
 
-    def _count_days(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _count_days(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: Ask how many times a given weekday occurs in a specified range.
         example:
@@ -334,7 +334,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, str(count), metadata
 
-    def _count_business_days(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _count_business_days(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: Count the number of business days (Monday-Friday) between two dates.
         example:
@@ -385,7 +385,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         }
         return question, str(count), metadata
 
-    def _is_leap_year(self, rng: random.Random) -> Tuple[str, str, dict]:
+    def _is_leap_year(self, rng: random.Random) -> tuple[str, str, dict]:
         """
         task: Given a year, determine whether it is a leap year.
         example:
@@ -426,7 +426,7 @@ class CalendarArithmeticDataset(ProceduralDataset):
         random_days = rng.randint(0, delta)
         return start_date + timedelta(days=random_days)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         # we suppose the answer is the last occurence of the expected answer type
         if answer is None:
             return 0.0
diff --git a/reasoning_gym/arithmetic/chain_sum.py b/reasoning_gym/arithmetic/chain_sum.py
index c90a4abb..2072983c 100644
--- a/reasoning_gym/arithmetic/chain_sum.py
+++ b/reasoning_gym/arithmetic/chain_sum.py
@@ -1,6 +1,8 @@
 import random
 from dataclasses import dataclass
-from typing import Optional
+from typing import Any, Optional
+
+from reasoning_gym import utils
 
 from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition
 from ..factory import ProceduralDataset, register_dataset
@@ -108,6 +110,9 @@ class ChainSumDataset(ProceduralDataset):
         expression = " ".join(expression_parts)
         return expression, result
 
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        return utils.compute_decimal_reward(answer, oracle_answer=entry["answer"])
+
 
 class ChainSumCurriculum(BaseCurriculum):
     def __init__(self):
diff --git a/reasoning_gym/arithmetic/decimal_arithmetic.py b/reasoning_gym/arithmetic/decimal_arithmetic.py
index 881c004a..9d311bbf 100644
--- a/reasoning_gym/arithmetic/decimal_arithmetic.py
+++ b/reasoning_gym/arithmetic/decimal_arithmetic.py
@@ -2,7 +2,7 @@ import ast
 from dataclasses import dataclass
 from decimal import ROUND_HALF_UP, Decimal, getcontext
 from random import Random
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -25,7 +25,7 @@ class DecimalArithmeticConfig:
         ), "precision must be 2 or more higher than max_num_decimal_places"
 
 
-def build_grouped_expression(operands: List[str], operators: List[str], rng: Random) -> str:
+def build_grouped_expression(operands: list[str], operators: list[str], rng: Random) -> str:
     """
     Recursively build an arithmetic expression string from operands and operators,
     inserting parentheses at random.
@@ -53,7 +53,7 @@ def generate_arithmetic_problem(
     min_num_decimal_places: int,
     max_num_decimal_places: int,
     terms: int = 2,
-    operations: Optional[List[str]] = None,
+    operations: Optional[list[str]] = None,
 ) -> str:
     """
     Generates a simple arithmetic problem with decimal numbers (as a string) formatted
@@ -72,8 +72,8 @@ def generate_arithmetic_problem(
     if operations is None:
         operations = ["+", "-", "*", "/"]
 
-    operands: List[str] = []
-    operators: List[str] = []
+    operands: list[str] = []
+    operators: list[str] = []
 
     for i in range(terms):
         # Choose a random number of decimal places for this term.
@@ -149,7 +149,7 @@ class DecimalArithmeticDataset(ProceduralDataset):
     def __init__(self, config: DecimalArithmeticConfig) -> None:
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def __getitem__(self, idx: int) -> Dict[str, Any]:
+    def __getitem__(self, idx: int) -> dict[str, Any]:
         """
         Generate a single arithmetic task.
 
@@ -180,7 +180,7 @@ class DecimalArithmeticDataset(ProceduralDataset):
 
         return {"question": problem_str, "answer": answer, "metadata": {}}
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """
         Compares the user's answer (converted to Decimal) with the correct answer.
         Instead of requiring exact equality, we allow an error up to one unit in the
diff --git a/reasoning_gym/arithmetic/decimal_chain_sum.py b/reasoning_gym/arithmetic/decimal_chain_sum.py
index da920c9d..8e20ed59 100644
--- a/reasoning_gym/arithmetic/decimal_chain_sum.py
+++ b/reasoning_gym/arithmetic/decimal_chain_sum.py
@@ -133,7 +133,7 @@ class DecimalChainSumDataset(ProceduralDataset):
         result = result.quantize(Decimal(f"0.{'0' * max(decimal_places)}"))
         return expression, result
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Score the answer by comparing decimal values instead of strings.
         Args:
             answer: The answer to score
diff --git a/reasoning_gym/arithmetic/dice.py b/reasoning_gym/arithmetic/dice.py
index f4ad97e9..0dcf3e44 100644
--- a/reasoning_gym/arithmetic/dice.py
+++ b/reasoning_gym/arithmetic/dice.py
@@ -2,7 +2,7 @@ from dataclasses import dataclass
 from functools import reduce
 from math import gcd
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -125,14 +125,14 @@ class DiceDataset(ProceduralDataset):
             "metadata": {},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the Dice task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/arithmetic/fraction_simplification.py b/reasoning_gym/arithmetic/fraction_simplification.py
index bd80fd70..d0cc2cb8 100644
--- a/reasoning_gym/arithmetic/fraction_simplification.py
+++ b/reasoning_gym/arithmetic/fraction_simplification.py
@@ -4,7 +4,7 @@ import re
 from dataclasses import dataclass
 from math import gcd
 from random import Random
-from typing import Any, Dict, Optional, Sequence, Tuple
+from typing import Any, Optional, Sequence
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -42,7 +42,7 @@ class FractionSimplificationDataset(ProceduralDataset):
     def __init__(self, config: FractionSimplificationConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _generate_fraction(self, rng: Random) -> Tuple[int, int, int, int]:
+    def _generate_fraction(self, rng: Random) -> tuple[int, int, int, int]:
         """Generate a random fraction and its simplified form.
         Returns (numerator, denominator, simplified_num, simplified_den)"""
         # Try to generate valid fractions until we get one that meets our criteria
@@ -134,7 +134,7 @@ class FractionSimplificationDataset(ProceduralDataset):
         except:
             return None
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]):
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]):
         reward = 0.0
         metadata = entry["metadata"]
         try:
diff --git a/reasoning_gym/arithmetic/gcd.py b/reasoning_gym/arithmetic/gcd.py
index 0c25797f..a764b2bd 100644
--- a/reasoning_gym/arithmetic/gcd.py
+++ b/reasoning_gym/arithmetic/gcd.py
@@ -4,7 +4,7 @@ from dataclasses import dataclass
 from functools import reduce
 from math import gcd
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -34,7 +34,7 @@ class GCDDataset(ProceduralDataset):
     def __init__(self, config: GCDConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _generate_numbers(self, rng: Random) -> Tuple[List[int], int]:
+    def _generate_numbers(self, rng: Random) -> tuple[list[int], int]:
         """Generate a list of random positive integers and their GCD.
         Will try up to 3 times to find numbers with GCD > 1."""
 
diff --git a/reasoning_gym/arithmetic/gsm_symbolic/generators_00_49.py b/reasoning_gym/arithmetic/gsm_symbolic/generators_00_49.py
index fb8d170b..eea9429b 100644
--- a/reasoning_gym/arithmetic/gsm_symbolic/generators_00_49.py
+++ b/reasoning_gym/arithmetic/gsm_symbolic/generators_00_49.py
@@ -6,11 +6,11 @@ from typing import Any, Dict
 from reasoning_gym.utils import format_number, is_integer
 
 
-def generate_0(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_0(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, food: str, peel_rate: int, batch_size: int, time_per_batch: int, total_amount: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         peel_time = total_amount // peel_rate
         num_batches = total_amount // batch_size
@@ -44,7 +44,7 @@ def generate_0(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emily", "Sarah", "Emma", "Sophia", "Olivia", "Ava", "Isabella", "Mia"]
         foods = ["shrimp", "onion", "carrot", "mushroom", "clam"]
 
@@ -76,11 +76,11 @@ def generate_0(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_1(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_1(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, family: str, blocks: int, animals: int, rings: int, total: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         bouncy_balls = total - (blocks + animals + rings)
 
         question = f"When {name} watches her {family}, she gets out a variety of toys for him. The bag of building blocks has {blocks} blocks in it. The bin of stuffed animals has {animals} stuffed animals inside. The tower of stacking rings has {rings} multicolored rings on it. {name} recently bought a tube of bouncy balls, bringing her total number of toys for her {family} up to {total}. How many bouncy balls came in the tube?"
@@ -103,7 +103,7 @@ def generate_1(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Sophie", "Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia"]
         family_members = ["nephew", "cousin", "brother"]
 
@@ -132,11 +132,11 @@ def generate_1(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_2(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_2(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         teacher: str, total: int, p1: int, p2: int, group1: str, group2: str, group3: str, event: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         group1_count = int(total * p1 / 100)
         remaining = total - group1_count
         group23_count = int(remaining * p2 / 100)
@@ -171,7 +171,7 @@ def generate_2(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         teachers = ["Ms. Johnson", "Mr. Smith", "Dr. Lee", "Mrs. Garcia"]
         sports = ["soccer players", "basketball players", "volleyball players", "swimmers"]
         activities = ["dancers", "choir members", "debate team members", "robotics club members"]
@@ -220,7 +220,7 @@ def generate_2(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_3(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_3(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -229,7 +229,7 @@ def generate_3(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         letters_per_week: int,
         pages_per_letter: int,
         minutes_per_page: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         current_pals = initial_pals - lost_pals
         letters_received = current_pals * letters_per_week
         pages_to_write = letters_received * pages_per_letter
@@ -263,7 +263,7 @@ def generate_3(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Mike", "John", "David", "James", "Robert", "William", "Richard"]
         name = rng.choice(names)
 
@@ -312,11 +312,11 @@ def generate_3(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_4(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_4(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, items: str, food: str, location: str, container: str, num_jars: int, per_jar: int, per_pan: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_items = num_jars * per_jar
         num_pans = total_items // per_pan
 
@@ -342,7 +342,7 @@ def generate_4(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Mary", "Sarah", "Emma", "Elizabeth", "Catherine"]
         items = ["sprinkles", "frosting", "icing", "chocolate chips"]
         foods = ["cupcake", "cookie", "brownie", "muffin"]
@@ -383,7 +383,7 @@ def generate_4(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_5(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_5(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str,
@@ -396,7 +396,7 @@ def generate_5(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         s1: int,
         s2: int,
         goal: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         signatures_collected = s1 + s2
         signatures_needed = goal - signatures_collected
 
@@ -425,7 +425,7 @@ def generate_5(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte", "Carol", "Jennifer"]
         cities = ["Los Angeles", "New York", "Chicago", "Houston", "Phoenix"]
         celebrity_types = ["movie stars", "athletes", "musicians", "politicians", "authors"]
@@ -463,9 +463,9 @@ def generate_5(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_6(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_6(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(n_girls: int, place: str, multiplier: int) -> Dict[str, Any]:
+    def generate_from_variables(n_girls: int, place: str, multiplier: int) -> dict[str, Any]:
         n_boys = n_girls * multiplier
         total_kids = n_girls + n_boys
 
@@ -487,7 +487,7 @@ def generate_6(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         places = ["park", "yard", "field", "playground", "garden"]
         multipliers = [2, 3, 4]  # twice, triple, quadruple
 
@@ -515,11 +515,11 @@ def generate_6(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_7(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_7(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, plants_received: int, plants_per_ledge: int, num_ledges: int, plants_to_give: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         initial_plants = plants_per_ledge * num_ledges
         total_plants = initial_plants + plants_received
@@ -548,7 +548,7 @@ def generate_7(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Mary", "Emma", "Sophia", "Isabella", "Olivia", "Ava", "Mia"]
 
         name = rng.choice(names)
@@ -578,11 +578,11 @@ def generate_7(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_8(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_8(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, drink: str, sugar_ratio: int, water_ratio: int, total_items: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_ratio = sugar_ratio + water_ratio
         sugar_amount = (sugar_ratio * total_items) // total_ratio
 
@@ -605,7 +605,7 @@ def generate_8(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte"]
         drinks = ["coffee", "tea"]
 
@@ -636,7 +636,7 @@ def generate_8(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_9(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_9(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -649,7 +649,7 @@ def generate_9(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         item1: str,
         item2: str,
         currency: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         initial_amount = num_bills * bill_value
         spent_items1 = num_items1 * price1
@@ -688,7 +688,7 @@ def generate_9(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Craig", "John", "Michael", "David", "James", "Robert", "William"]
         items1 = ["toy car", "action figure", "coloring book", "puzzle", "board game"]
         items2 = ["sticker", "candy bar", "trading card", "pencil", "eraser"]
@@ -745,11 +745,11 @@ def generate_9(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_10(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_10(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str, name2: str, age1: int, years: int, relation_type: str, mult: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         future_age = age1 * mult
         current_age = future_age - years
 
@@ -774,7 +774,7 @@ def generate_10(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_male = ["James", "John", "Robert", "Michael", "William", "David", "Richard"]
         names_female = ["Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan"]
         relation_types = ["sister", "cousin"]
@@ -809,9 +809,9 @@ def generate_10(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_11(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_11(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, food1: str, food2: str, mult: int, n: int, m: int, k: int) -> Dict[str, Any]:
+    def generate_from_variables(name: str, food1: str, food2: str, mult: int, n: int, m: int, k: int) -> dict[str, Any]:
         # Initial amounts
         initial_food2 = n  # Initial amount of food2
         initial_food1 = n * mult  # Initial amount of food1 (mult times food2)
@@ -872,7 +872,7 @@ def generate_11(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Allan", "John", "Michael", "David", "James", "Robert", "William"]
         foods = ["corn", "apple", "banana", "orange", "pear", "grape", "fig", "persimmon", "plum", "kiwi"]
 
@@ -900,11 +900,11 @@ def generate_11(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_12(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_12(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, game1: str, game2: str, period: str, time1: int, time2: int, num1: int, num2: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_time1 = time1 * num1
         total_time2 = time2 * num2
         total_time = total_time1 + total_time2
@@ -937,7 +937,7 @@ def generate_12(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte"]
         games = ["word puzzle", "jigsaw puzzle", "chess puzzle", "riddle", "brain teaser"]
         periods = ["weekend", "vacation", "holiday", "day off", "free time"]
@@ -971,11 +971,11 @@ def generate_12(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_13(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_13(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         park_name: str, unit: str, length1: int, length2: int, speed1: int, speed2: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         time1 = length1 // speed1
         time2 = length2 // speed2
         time_diff = time1 - time2
@@ -1001,7 +1001,7 @@ def generate_13(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         parks = ["Splash World", "Aqua Adventure", "Water Wonderland", "Neptunes Kingdom"]
         units = ["yards", "meters"]
 
@@ -1050,11 +1050,11 @@ def generate_13(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_14(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_14(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, day1: str, day2: str, day3: str, time1: int, time2: int, mult: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         combined_time = time1 + time2
         target_time = combined_time * mult
 
@@ -1080,7 +1080,7 @@ def generate_14(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Peter", "John", "Michael", "David", "James", "Robert", "William"]
         weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
         multipliers = [2, 3, 4]
@@ -1122,7 +1122,7 @@ def generate_14(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_15(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_15(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -1137,7 +1137,7 @@ def generate_15(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         price3: int,
         price4: int,
         discount: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         shorts_price = price1 + price2
         shoes_price = price3 // 2
@@ -1169,7 +1169,7 @@ def generate_15(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte", "Amelia"]
         sports = ["swimming", "cycling", "basketball", "soccer", "volleyball"]
         items1 = ["t-shirt", "jersey", "sports bra"]
@@ -1212,11 +1212,11 @@ def generate_15(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_16(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_16(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str, name2: str, name3: str, platform: str, mult1: int, mult2: int, n: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         base_friends = n // mult1  # Dorothy's friends
         charlie_friends = n  # Charlie's friends
         james_friends = base_friends * mult2  # James's friends
@@ -1243,7 +1243,7 @@ def generate_16(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Charlie", "Dorothy", "James", "Sarah", "Michael", "Emily", "David"]
         platforms = ["Instagram", "Twitter", "LinkedIn", "TikTok", "Snapchat"]
 
@@ -1276,11 +1276,11 @@ def generate_16(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_17(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_17(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         facility: str, total: int, item: str, frac: Fraction, event: str, daily: int, period: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         initial_occupied = int(total * frac)
         initial_empty = total - initial_occupied
         weekly_admitted = daily * 7
@@ -1310,7 +1310,7 @@ def generate_17(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         facilities = ["hospital", "clinic", "medical center", "care facility"]
         items = ["bed", "room", "ward"]
         events = ["flu season", "natural disaster", "major accident", "pandemic"]
@@ -1365,9 +1365,9 @@ def generate_17(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_18(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_18(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name1: str, name2: str, game: str, n1: int, n2: int, frac: float) -> Dict[str, Any]:
+    def generate_from_variables(name1: str, name2: str, game: str, n1: int, n2: int, frac: float) -> dict[str, Any]:
         score2 = int(frac * n1 + n2)
         total = n1 + score2
 
@@ -1392,7 +1392,7 @@ def generate_18(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = [
             "James",
             "John",
@@ -1445,11 +1445,11 @@ def generate_18(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_19(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_19(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, pan: str, initial_kernels: int, time_interval: int, multiplier_2: int, multiplier_3: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         second_interval = multiplier_2 * initial_kernels
         third_interval = multiplier_3 * initial_kernels
         fourth_interval = third_interval // 2
@@ -1480,7 +1480,7 @@ def generate_19(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Garrett", "James", "Michael", "David", "John", "Robert", "William"]
         pans = ["pan", "pot", "skillet"]
 
@@ -1514,11 +1514,11 @@ def generate_19(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_20(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_20(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, obj: str, surface: str, capacity: int, total: int, num_trays: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         max_capacity = capacity * num_trays
         leftover = total - max_capacity
 
@@ -1543,7 +1543,7 @@ def generate_20(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard"]
         objects = ["olive", "almond", "cookie", "cracker", "banana"]
         surfaces = ["plate", "table", "bowl", "tray", "basket"]
@@ -1575,11 +1575,11 @@ def generate_20(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_21(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_21(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, length: int, unit_length: str, plant_width: int, space: float, owned: int, currency: str, cost: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_plants = int(length / space)
         plants_to_buy = total_plants - owned
         total_cost = plants_to_buy * cost
@@ -1630,7 +1630,7 @@ def generate_21(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte"]
         currencies = ["$", "£", "€"]
         units = ["feet", "meters"]
@@ -1673,11 +1673,11 @@ def generate_21(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_22(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_22(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, property_type: str, budget: int, price: int, brokerage_fee: int, transfer_fee: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         brokerage_amount = int(price * brokerage_fee / 100)
         transfer_amount = int(price * transfer_fee / 100)
         total_price = price + brokerage_amount + transfer_amount
@@ -1705,7 +1705,7 @@ def generate_22(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Mrs. Smith", "Ms. Johnson", "Dr. Patel", "Mrs. Lee"]
         property_types = ["house", "apartment", "condo", "townhouse"]
 
@@ -1741,11 +1741,11 @@ def generate_22(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_23(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_23(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, task: str, profession: str, hours: int, work_type: str, rate: int, fee: int, currency: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         lost_income = hours * rate
         savings = lost_income - fee
 
@@ -1771,7 +1771,7 @@ def generate_23(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Sophia", "Isabella", "Olivia", "Ava", "Mia", "Emily"]
         tasks = ["her taxes", "her financial planning", "her business accounting"]
         professions = ["accountant", "financial advisor", "tax consultant", "bookkeeper"]
@@ -1810,11 +1810,11 @@ def generate_23(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_24(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_24(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         comet_name: str, name: str, relative: str, orbit_period: int, relative_age: int, multiple: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         second_viewing_age = relative_age * multiple
         first_viewing_age = second_viewing_age - orbit_period
 
@@ -1839,7 +1839,7 @@ def generate_24(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         comets = ["Halley", "Hale-Bopp", "Hyakutake", "Encke"]
         names = ["William", "James", "John", "Robert", "Michael", "David"]
         relatives = ["dad", "father", "uncle", "grandfather"]
@@ -1884,11 +1884,11 @@ def generate_24(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_25(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_25(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         food: str, calories: int, size: int, servings: int, total_target: int, consumed: int, unit: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         calories_left = total_target - consumed
         serving_fraction = Fraction(calories_left, calories)
@@ -1924,7 +1924,7 @@ def generate_25(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         foods = ["popcorn", "breads", "cookies"]
         units = ["grams", "ounces", "oz"]
 
@@ -1975,9 +1975,9 @@ def generate_25(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_26(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_26(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(n: int, ball_type: str, color: str, frac_1: float, frac_2: float) -> Dict[str, Any]:
+    def generate_from_variables(n: int, ball_type: str, color: str, frac_1: float, frac_2: float) -> dict[str, Any]:
         first_calc = int(n * frac_1)
         final_calc = int(first_calc * frac_2)
 
@@ -2001,7 +2001,7 @@ def generate_26(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         ball_types = ["golf", "tennis"]
         colors = ["blue", "red", "green", "yellow", "white"]
         fractions = [0.5, 0.25, 0.75]
@@ -2032,7 +2032,7 @@ def generate_26(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_27(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_27(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -2043,7 +2043,7 @@ def generate_27(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         freq: int,
         rate: float,
         currency: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         first_two = n_first * apartments_each
         third_complex = int(first_two * percent_bigger / 100)
@@ -2082,7 +2082,7 @@ def generate_27(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "Robert", "William"]
         currencies = ["$", "£", "€"]
 
@@ -2118,7 +2118,7 @@ def generate_27(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_28(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_28(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -2130,7 +2130,7 @@ def generate_28(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         extra_price: float,
         currency: str,
         unit: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         price_increase = price * percent / 100
         new_price = price + price_increase
         weekly_usage = usage * 7
@@ -2161,7 +2161,7 @@ def generate_28(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_male = ["John", "Michael", "David", "James", "Robert", "William", "Richard", "Thomas"]
         items = ["tea", "sugar", "flour", "rice"]
         currencies_sym = ["$", "£", "€"]
@@ -2199,9 +2199,9 @@ def generate_28(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_29(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_29(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name1: str, name2: str, n1: int, n2: int, k1: int, k2: int) -> Dict[str, Any]:
+    def generate_from_variables(name1: str, name2: str, n1: int, n2: int, k1: int, k2: int) -> dict[str, Any]:
         total_puppies = n1 + n2
         spotted_puppies = k1 + k2
         percentage = int(100 * spotted_puppies / total_puppies)
@@ -2232,7 +2232,7 @@ def generate_29(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = [
             "Jennifer",
             "Michael",
@@ -2307,11 +2307,11 @@ def generate_29(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_30(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_30(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         group: str, n: int, n_1: int, n_2: int, hobby1: str, hobby2: str, hobby3: str, hobby4: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         n_4 = 2 * n_2  # number that like hobby4 (music)
         n_3 = n - (n_1 + n_2 + n_4)  # number that like hobby3 (video games)
 
@@ -2338,7 +2338,7 @@ def generate_30(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         groups = ["group", "class"]
         hobbies = ["read", "paint", "hike", "dance", "bake", "play video games", "play music"]
         sports = ["basketball", "soccer", "tennis", "baseball", "volleyball"]
@@ -2374,11 +2374,11 @@ def generate_30(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_31(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_31(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, fruit: str, total: int, n1: int, n2: int, n3: int, sibling1: str, sibling2: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         slice2 = n1 + n2
         slice3 = slice2 + n3
         total_eaten = n1 + slice2 + slice3
@@ -2405,7 +2405,7 @@ def generate_31(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte", "Doxa"]
         fruits = ["orange", "pear", "peach", "mango", "kiwi", "apple"]
         siblings = ["brother", "sister", "cousin", "friend"]
@@ -2454,11 +2454,11 @@ def generate_31(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_32(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_32(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, periods: int, extra_classes: int, mins_per_class: int, days: int, weekend_fraction: float
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_classes = periods + extra_classes
         daily_mins = total_classes * mins_per_class
         weekly_mins = daily_mins * days
@@ -2497,7 +2497,7 @@ def generate_32(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "James", "William", "Michael", "David", "Richard", "Thomas"]
         fractions = ["1/16", "1/8", "1/4", "1/2"]
 
@@ -2540,9 +2540,9 @@ def generate_32(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_33(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_33(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name1: str, name2: str, mult: int, n: int) -> Dict[str, Any]:
+    def generate_from_variables(name1: str, name2: str, mult: int, n: int) -> dict[str, Any]:
         n_mult = n * mult
         daily_total = n + n_mult
         weekly_total = daily_total * 7
@@ -2572,7 +2572,7 @@ def generate_33(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Julie", "Sarah", "Emma", "Sophia", "Olivia", "Isabella", "Mia", "Charlotte"]
         multi_times = [2, 3, 4]
 
@@ -2617,9 +2617,9 @@ def generate_33(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_34(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_34(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(event: str, item: str, family: str, n: int, m: int, total: int) -> Dict[str, Any]:
+    def generate_from_variables(event: str, item: str, family: str, n: int, m: int, total: int) -> dict[str, Any]:
         twins_total = 2 * n
         remaining = total - twins_total
         friends_found = remaining - m
@@ -2645,7 +2645,7 @@ def generate_34(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         events = ["Halloween candy hunt", "Treasure hunt", "Scavenger hunt", "Charity fundraiser"]
         items = ["eggs", "treats", "toys", "coins", "tokens", "balls", "candies", "goodies"]
         families = ["Johnson", "Williams", "Mirzakhani", "Lopez", "Garcia", "Lee"]
@@ -2679,11 +2679,11 @@ def generate_34(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_35(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_35(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         job: str, building: str, room: str, num_rooms: int, num_days: int, time_per_room: int, hours_per_day: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         # Calculate values ensuring integer percentage
         rooms_per_day = num_rooms // num_days  # Integer division for rooms per day
@@ -2722,7 +2722,7 @@ def generate_35(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         jobs = ["janitor", "cleaner", "maintenance worker"]
         buildings = ["office building", "hospital", "university"]
         rooms = ["room", "floor"]
@@ -2774,9 +2774,9 @@ def generate_35(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_36(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_36(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(n: int, p1: int, r1: int, name: str, s1: str, s2: str, s3: str) -> Dict[str, Any]:
+    def generate_from_variables(n: int, p1: int, r1: int, name: str, s1: str, s2: str, s3: str) -> dict[str, Any]:
         easy_questions = int(n * (p1 / 100))
         other_questions = int(n * (1 - p1 / 100))
         easy_correct = int(easy_questions * (r1 / 100))
@@ -2814,7 +2814,7 @@ def generate_36(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         subjects = [
             "history",
             "geography",
@@ -2861,11 +2861,11 @@ def generate_36(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_37(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_37(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         worker: str, base: int, unit: str, tool1: str, tool2: str, tool3: str, mult1: int, mult2: int, n: int, days: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         iron_amount = base * mult1
         steel_amount = int(iron_amount * (1 + mult2 / 100))
         daily_total = steel_amount * n
@@ -2903,7 +2903,7 @@ def generate_37(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         workers = ["miner", "goblin", "gnome", "troll"]
         tools1 = ["bare hands", "basic shovel", "wooden pickaxe"]
         units = ["pounds", "kgs"]
@@ -2962,7 +2962,7 @@ def generate_37(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_38(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_38(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -2978,7 +2978,7 @@ def generate_38(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         n3: int,
         n4: int,
         n5: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         skeins_per_child = n1 + n2 + n3 + n4 + n5
         total_skeins = count * skeins_per_child
@@ -3003,7 +3003,7 @@ def generate_38(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Martha", "Mary", "Elizabeth", "Susan", "Margaret", "Patricia"]
         clothing_items = ["sweater", "shawl", "hat", "cardigan", "poncho", "vest", "beanie", "tunic"]
         accessories = ["mittens", "booties", "socks", "leg warmers", "gloves"]
@@ -3037,11 +3037,11 @@ def generate_38(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_39(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_39(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         total: int, grade: str, school_name: str, num_girls: int, day: str, absent_girls: int, absent_boys: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         num_boys = total - num_girls
         remaining_boys = num_boys - absent_boys
 
@@ -3067,7 +3067,7 @@ def generate_39(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         school_names = ["Maple Grove", "Sunny Hill", "Oak Ridge", "Pine Valley"]
         grades = ["first", "second", "third", "fourth", "fifth"]
         days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
@@ -3112,9 +3112,9 @@ def generate_39(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_40(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_40(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(item: str, n1: int, c1: str, c2: str, c3: str, p: int) -> Dict[str, Any]:
+    def generate_from_variables(item: str, n1: int, c1: str, c2: str, c3: str, p: int) -> dict[str, Any]:
         more_cards = int(p / 100 * n1)
         n2 = n1 + more_cards
         n3 = n1 + n2
@@ -3147,7 +3147,7 @@ def generate_40(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         items = ["magician", "artist", "chef", "scientist", "athlete"]
         colors = ["red", "blue", "green", "yellow", "purple", "orange"]
 
@@ -3186,11 +3186,11 @@ def generate_40(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_41(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_41(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, event: str, organization: str, fraction: str, current: int, total: int, currency: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         fraction = convert_fraction_word(fraction)
         fraction_val = Fraction(fraction)
         org_amount = int(total * fraction_val)
@@ -3230,7 +3230,7 @@ def generate_41(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         }
         return FRACTION_WORDS.get(fraction_str.lower(), fraction_str)
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "William", "Robert", "Joseph"]
         events = ["field trip", "sports tournament", "conference", "music festival", "science fair"]
         organizations = ["school", "community center", "local charity", "youth club", "parent association"]
@@ -3287,7 +3287,7 @@ def generate_41(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_42(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_42(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         title: str,
@@ -3299,7 +3299,7 @@ def generate_42(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         fee2_name: str,
         fee2_percent: int,
         loan: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         fee1_amount = price * fee1_percent // 100
         fee2_amount = price * fee2_percent // 100
@@ -3337,7 +3337,7 @@ def generate_42(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         titles = ["Mr.", "Prof.", "Dr."]
         names = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez"]
         properties = ["house", "apartment", "condo", "villa", "cottage"]
@@ -3399,9 +3399,9 @@ def generate_42(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_43(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_43(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(person1: str, item: str, n: int, relation: str, k: int) -> Dict[str, Any]:
+    def generate_from_variables(person1: str, item: str, n: int, relation: str, k: int) -> dict[str, Any]:
         other_amount = n - k
         total = n + other_amount
 
@@ -3425,7 +3425,7 @@ def generate_43(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         people = ["student", "boy", "child", "kid"]
         items = ["marble", "sticker", "toy", "book", "pencil"]
         relations = ["sister", "brother", "friend", "cousin"]
@@ -3453,7 +3453,7 @@ def generate_43(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_44(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_44(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         store: str,
@@ -3467,7 +3467,7 @@ def generate_44(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         p2: int,
         p3: int,
         currency: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total1 = n1 * p1
         total2 = n2 * p2
         total3 = n3 * p3
@@ -3493,7 +3493,7 @@ def generate_44(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         stores = ["store", "shop", "market", "warehouse"]
         colors = ["red", "blue", "green", "yellow", "purple", "orange", "pink"]
         currencies = ["$", "€", "£"]
@@ -3526,11 +3526,11 @@ def generate_44(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_45(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_45(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, event: str, food: str, obj: str, package_husband: int, used_items: int, total_remaining: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         total_items = total_remaining + used_items
         package_size = total_items - package_husband
@@ -3557,7 +3557,7 @@ def generate_45(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Emma", "Olivia", "Sophia", "Isabella", "Ava", "Mia", "Charlotte"]
         events = ["lunch party", "birthday party", "potluck party", "baby shower", "game night"]
         foods = [
@@ -3609,11 +3609,11 @@ def generate_45(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_46(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_46(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         fruit1: str, fruit2: str, n1: int, n2: int, frac1: float, frac2: float, spill: int, total: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         n1_after_spill = n1 - spill
         water_fruit1 = n1_after_spill * frac1
         water_fruit2 = n2 * frac2
@@ -3642,7 +3642,7 @@ def generate_46(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         fruits = ["apple", "grape", "mango", "peach", "lemon"]
         fractions = {
             "two-thirds": 2 / 3,
@@ -3684,11 +3684,11 @@ def generate_46(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_47(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_47(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, n1: int, c1: float, n2: int, c2: float, c3: int, obj1: str, obj2: str, currency: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         cost1 = n1 * c1
         cost2 = n2 * c2
         total_cost = cost1 + cost2 + c3
@@ -3720,7 +3720,7 @@ def generate_47(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "William", "Robert", "Thomas"]
         items = ["notebooks", "pencils", "erasers", "crayons", "colored pencils", "markers", "rulers", "folders"]
         currencies = ["$", "€", "£"]
@@ -3756,11 +3756,11 @@ def generate_47(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_48(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_48(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         item1: str, item2: str, shop: str, currency: str, price1: int, price2: int, n1: int, n2: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total1 = n1 * price1
         total2 = n2 * price2
         diff = total1 - total2
@@ -3793,7 +3793,7 @@ def generate_48(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         items1 = ["bread", "sourdough"]
         items2 = ["bagel", "muffin", "croissant", "biscuit"]
         shops = ["bakery", "cafe", "store", "market"]
@@ -3830,11 +3830,11 @@ def generate_48(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_49(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_49(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, item1: str, item2: str, price1: int, price2: int, total: float, n1: int, percent: int, currency: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         spent = total * (100 - percent) / 100  # Amount spent
         cost_item1 = n1 * price1 / 100  # Cost of item1
         spent_item2 = spent - cost_item1  # Amount spent on item2
@@ -3869,7 +3869,7 @@ def generate_49(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["George", "James", "John", "Robert", "Michael", "William"]
         items = ["pretzels", "popcorn", "gum", "cookies", "crackers"]
         currencies = ["$", "£", "€"]
diff --git a/reasoning_gym/arithmetic/gsm_symbolic/generators_50_99.py b/reasoning_gym/arithmetic/gsm_symbolic/generators_50_99.py
index d700c2f5..960e023a 100644
--- a/reasoning_gym/arithmetic/gsm_symbolic/generators_50_99.py
+++ b/reasoning_gym/arithmetic/gsm_symbolic/generators_50_99.py
@@ -5,9 +5,9 @@ from typing import Any, Dict
 from reasoning_gym.utils import format_number, is_integer
 
 
-def generate_50(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_50(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, pieces1: int, pieces2: int) -> Dict[str, Any]:
+    def generate_from_variables(name: str, pieces1: int, pieces2: int) -> dict[str, Any]:
         half_pieces1 = pieces1 // 2
         total_pieces = half_pieces1 + pieces2
 
@@ -29,7 +29,7 @@ def generate_50(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Teddy", "Tommy", "Billy", "Jimmy", "Bobby", "Danny"]
         name = rng.choice(names)
 
@@ -53,7 +53,7 @@ def generate_50(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_51(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_51(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -67,7 +67,7 @@ def generate_51(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         tokens: int,
         cost1: int,
         cost2: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         cost_per_ride = cost2 * times
         cost_per_person = tokens + cost1 + cost_per_ride
@@ -107,7 +107,7 @@ def generate_51(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Sophia", "Isabella", "Mia", "Charlotte"]
         parents = ["mom", "dad", "aunt", "uncle"]
         activities1 = ["mini-golf", "bowling", "laser tag"]
@@ -151,9 +151,9 @@ def generate_51(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_52(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_52(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, alphabets: tuple, n1: str, frac: str) -> Dict[str, Any]:
+    def generate_from_variables(name: str, alphabets: tuple, n1: str, frac: str) -> dict[str, Any]:
         alphabet_name, alphabet_count = alphabets
 
         # Calculate intermediate values
@@ -189,7 +189,7 @@ def generate_52(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Sophia", "Olivia", "Ava", "Isabella", "Mia", "Charlotte", "Amelia"]
         alphabets = [
             ("alphabet", 26),
@@ -229,9 +229,9 @@ def generate_52(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_53(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_53(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, sides: int, target: int, property: str) -> Dict[str, Any]:
+    def generate_from_variables(name: str, sides: int, target: int, property: str) -> dict[str, Any]:
         numbers_above = sides - target
         prob_above = (numbers_above / sides) * 100
         prob_two_in_row = 25  # probability of two even/odd in a row is always 25%
@@ -257,7 +257,7 @@ def generate_53(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph"]
         properties = ["even", "odd"]
 
@@ -290,7 +290,7 @@ def generate_53(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_54(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_54(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str,
@@ -301,7 +301,7 @@ def generate_54(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         location1: str,
         location2: str,
         location3: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         time_after_library = total_time - library_time
         remaining_time = time_after_library - station_time
@@ -328,7 +328,7 @@ def generate_54(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Jack", "James", "William", "Michael", "David", "Joseph"]
         locations = ["cinema", "mall", "library", "park", "gym", "bank", "school"]
 
@@ -366,11 +366,11 @@ def generate_54(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_55(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_55(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, place: str, fruit: str, location: str, insect1: str, insect2: str, n: int, frac: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         num_insect1 = int(n * 0.5)  # half as many bugs as ants
         total_insects = n + num_insect1
 
@@ -396,7 +396,7 @@ def generate_55(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Dax", "Alex", "Sam", "Jordan", "Taylor", "Morgan", "Riley"]
         places = ["orchard", "backyard", "greenhouse", "allotment"]
         fruits = ["strawberries", "cherries", "blueberries", "raspberries"]
@@ -430,11 +430,11 @@ def generate_55(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_56(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_56(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         family: str, item: str, total: int, n1: int, n2: int, flavor1: str, flavor2: str, flavor3: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         n3 = total - (n1 + n2)
 
         question = f"The {family} family is busy making {item}s. So far, they've made {total} {item}s. They have {n1} {flavor1} {item}s, {n2} {flavor2} {item}s, and some {flavor3} {item}s. How many {flavor3} {item}s have they made?"
@@ -459,7 +459,7 @@ def generate_56(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         families = ["Smith", "Johnson", "Williams", "Brown", "Jones"]
         items = ["cupcake", "muffin", "brownie", "biscuit"]
         flavors = ["vanilla", "strawberry", "blueberry", "lemon", "peanut butter"]
@@ -492,11 +492,11 @@ def generate_56(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_57(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_57(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         n1: int, sport1: str, sport2: str, sport3: str, n2: int, n3: int, multiplier: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         n_volleyball = n1 * multiplier
         n_soccer = n2 + n3
         total = n1 + n_volleyball + n_soccer
@@ -521,7 +521,7 @@ def generate_57(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         sports = ["basketball", "badminton", "table tennis", "football", "volleyball"]
         sport1, sport2, sport3 = rng.sample(sports, 3)
 
@@ -567,11 +567,11 @@ def generate_57(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_58(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_58(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, container: str, liquid: str, volume: int, unit: str, num_containers: int, calories: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_volume = volume * num_containers
         total_calories = total_volume * calories
 
@@ -597,7 +597,7 @@ def generate_58(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Mike", "James", "David", "Robert", "William"]
         containers = ["cup", "bottle", "carton"]
         liquids = ["juice", "soda", "sparkling water", "tea", "lemonade"]
@@ -628,11 +628,11 @@ def generate_58(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_59(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_59(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         time_per_interval: int, distance_per_interval: int, total_distance: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         intervals = total_distance // distance_per_interval
         total_time = intervals * time_per_interval
 
@@ -653,7 +653,7 @@ def generate_59(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         # Start with total_distance limit and work backwards
         max_total_distance = 100
 
@@ -698,11 +698,11 @@ def generate_59(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_60(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_60(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, unit: str, total_dist: int, beach_dist: int, sidewalk_dist: int, speed_mult: int, beach_time: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         beach_rate = Fraction(beach_dist, beach_time)
         sidewalk_rate = beach_rate * speed_mult
@@ -730,7 +730,7 @@ def generate_60(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Emma", "Sophia", "Isabella", "Olivia", "Ava", "Mia", "Emily"]
         units = ["mile", "kilometer", "block"]
 
@@ -783,7 +783,7 @@ def generate_60(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_61(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_61(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -804,7 +804,7 @@ def generate_61(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         p2: float,
         p3: float,
         discount: float,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         # Calculate costs
         item1_cost = n1 * p1 + n2 * (1 - discount) * p1 + k * p1  # Cost of item1 with discount applied
@@ -845,7 +845,7 @@ def generate_61(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte"]
         locations = ["beach", "boardwalk", "pier", "coast"]
         shops = ["souvenir store", "gift shop", "beach shop", "seaside store"]
@@ -939,11 +939,11 @@ def generate_61(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_62(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_62(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         item: str, num_slices: int, name1: str, name2: str, slices_per_day: int, multiplier: int, unit: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         second_person_slices = slices_per_day * multiplier
         total_daily_slices = slices_per_day + second_person_slices
@@ -971,7 +971,7 @@ def generate_62(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         items = ["pizza", "cake", "pie", "lasagna"]
         units = ["pieces", "portions", "servings"]
         names = ["Emma", "Liam", "Olivia", "Noah", "Ava", "Elijah", "Charlotte", "James"]
@@ -1004,9 +1004,9 @@ def generate_62(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_63(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_63(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, hours: int, days: int, rate: int, bonus: int, month: str) -> Dict[str, Any]:
+    def generate_from_variables(name: str, hours: int, days: int, rate: int, bonus: int, month: str) -> dict[str, Any]:
         daily_pay = hours * rate
         monthly_days = days * 4
         monthly_base = daily_pay * monthly_days
@@ -1041,7 +1041,7 @@ def generate_63(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph"]
         months = [
             "January",
@@ -1086,9 +1086,9 @@ def generate_63(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_64(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_64(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, n1: int, d1: int, n2: int, d2: int) -> Dict[str, Any]:
+    def generate_from_variables(name: str, n1: int, d1: int, n2: int, d2: int) -> dict[str, Any]:
         first_period = n1 * d1
         second_period = n2 * d2
         total_eggs = first_period + second_period
@@ -1120,7 +1120,7 @@ def generate_64(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Chester", "James", "John", "Robert", "Michael", "William", "David"]
         name = rng.choice(names)
 
@@ -1154,11 +1154,11 @@ def generate_64(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_65(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_65(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, fish: str, day: str, w1: int, w2: int, w3: int, n: int, unit: str, cur: str, price: float
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total = int((w1 + w2) * price + (n - 2) * w3 * price)
 
         question = f"{name} caught {n} {fish}s last {day}, the first {fish} he caught weighs {w1} {unit}s, the second {fish} he caught weighs {w2} {unit}s, and the last {fish} he caught weighs {w3} {unit}s. If a {unit} of {fish} costs {cur}{price:.2f}, how much will he earn after selling all the {fish}s to the market?"
@@ -1189,7 +1189,7 @@ def generate_65(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "Robert", "William", "Richard"]
         fish = ["salmon", "cod", "trout", "steelhead"]
         days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
@@ -1228,7 +1228,7 @@ def generate_65(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_66(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_66(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -1240,7 +1240,7 @@ def generate_66(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         min2: int,
         total_hours: int,
         num_wed_episodes: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         mon, tue, wed, thu, fri = weekdays
 
@@ -1274,7 +1274,7 @@ def generate_66(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph"]
         weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
 
@@ -1312,11 +1312,11 @@ def generate_66(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_67(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_67(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, fruit: str, n1: int, n2: int, d1: str, d2: str, d3: str, mult: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         first_two_days = n1 + n2
         friday_amount = mult * n1
         total = first_two_days + friday_amount
@@ -1344,7 +1344,7 @@ def generate_67(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "James", "William", "Michael", "David", "Robert", "Thomas"]
         fruits = ["banana", "apple", "orange", "pear", "peach", "plum"]
         weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
@@ -1376,9 +1376,9 @@ def generate_67(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_68(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_68(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(n0: int, r: int, d: int, disease: str) -> Dict[str, Any]:
+    def generate_from_variables(n0: int, r: int, d: int, disease: str) -> dict[str, Any]:
         # Calculate infected people after each day
         day1_new = n0 * r
         day1_total = n0 + day1_new
@@ -1408,7 +1408,7 @@ def generate_68(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             "variables": {"initial_infected": n0, "infection_rate": r, "days": d, "disease_type": disease},
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         diseases = ["virus", "bacteria", "parasite", "infection"]
         disease = rng.choice(diseases)
         d = 3  # Fixed at 3 days
@@ -1451,9 +1451,9 @@ def generate_68(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_69(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_69(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, document: str, total_pages: int, fraction: str) -> Dict[str, Any]:
+    def generate_from_variables(name: str, document: str, total_pages: int, fraction: str) -> dict[str, Any]:
         frac_num = eval(fraction)
         pages_done = int(total_pages * frac_num)
         pages_remaining = total_pages - pages_done
@@ -1477,7 +1477,7 @@ def generate_69(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte", "Amelia", "Harper", "Evelyn"]
         documents = ["essay", "report", "thesis", "dissertation", "assignment"]
         fractions = ["1/2", "1/3", "1/4", "2/3", "3/4"]
@@ -1507,11 +1507,11 @@ def generate_69(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_70(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_70(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, objects: str, n: int, obstacle: str, frac: float, k: int, fake_num: int, fake_object: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         dropped = int(n * frac)
         remaining = n - dropped
@@ -1548,7 +1548,7 @@ def generate_70(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph"]
         objects = ["marbles", "coins", "buttons", "beads", "pebbles"]
         obstacles = ["rock", "stick", "toy", "root"]
@@ -1609,7 +1609,7 @@ def generate_70(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_71(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_71(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -1624,7 +1624,7 @@ def generate_71(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         p1: int,
         p2: int,
         p3: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         cost1 = n1 * p1
         cost2 = n2 * p2
         cost3 = n3 * p3
@@ -1658,7 +1658,7 @@ def generate_71(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_female = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Charlotte", "Mia", "Amelia"]
         shops = ["bakery", "patisserie", "confectionery", "cafe"]
         items = ["pastries", "baked goods", "desserts", "treats"]
@@ -1697,11 +1697,11 @@ def generate_71(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_72(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_72(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         structure: str, n1: int, color1: str, color2: str, color3: str, obj: str, mult: int, total: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         n2 = n1 * mult
         n3 = total - n1 - n2
 
@@ -1728,7 +1728,7 @@ def generate_72(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         structures = ["building", "pyramid", "stack", "tower"]
         objects = ["brick", "cube", "tile", "block"]
         colors = ["green", "purple", "orange", "pink", "white", "black"]
@@ -1761,7 +1761,7 @@ def generate_72(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_73(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_73(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -1776,7 +1776,7 @@ def generate_73(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         cn: int,
         cm: int,
         currency: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         gingerbread_sunday = n1 + n2
         total_gingerbread = n1 + gingerbread_sunday
@@ -1814,7 +1814,7 @@ def generate_73(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "William", "Robert"]
         fruits = ["apple", "cherry", "blueberry", "peach"]
         foods = ["cookie", "brownie", "muffin", "cupcake"]
@@ -1850,11 +1850,11 @@ def generate_73(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_74(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_74(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, big_fish: str, length: int, num_remoras: int, remora_length: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_remora_length_inches = num_remoras * remora_length
         total_remora_length_feet = total_remora_length_inches / 12
         percentage = int((total_remora_length_feet / length) * 100)
@@ -1880,7 +1880,7 @@ def generate_74(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Benny", "Tommy", "Jimmy", "Billy", "Johnny", "Bobby"]
         big_fish = ["dolphin", "whale", "shark"]
 
@@ -1949,11 +1949,11 @@ def generate_74(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_75(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_75(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str, name2: str, color1: str, color2: str, n1: int, n2: int, frac1: float, mult1: float
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         n1_result = int(n1 * frac1)
         n2_result = int(n2 * mult1)
@@ -1987,7 +1987,7 @@ def generate_75(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Ben", "James", "John", "Michael", "William", "David", "Richard", "Joseph"]
         colors = ["blue", "red", "green", "yellow", "purple", "orange"]
 
@@ -2016,9 +2016,9 @@ def generate_75(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_76(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_76(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(n: int, p1: int, p2: int, company: str, frac: float) -> Dict[str, Any]:
+    def generate_from_variables(n: int, p1: int, p2: int, company: str, frac: float) -> dict[str, Any]:
         interviews = int(n * (p1 / 100))
         offers = int(interviews * (p2 / 100))
         accepts = int(offers * frac)
@@ -2049,7 +2049,7 @@ def generate_76(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         companies = ["Microsoft", "Apple", "Amazon", "Facebook", "Netflix", "Tesla", "Google"]
         fractions = {"a third": 1 / 3, "half": 1 / 2, "a quarter": 1 / 4, "two thirds": 2 / 3}
 
@@ -2099,11 +2099,11 @@ def generate_76(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_77(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_77(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         event: str, m: int, w: int, t: str, frac: float, m_left: int, group1: str, group2: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total = m + w
         left_count = int(total * frac)
         stayed = total - left_count
@@ -2137,7 +2137,7 @@ def generate_77(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         events = ["party", "meeting", "conference", "gathering", "celebration"]
         groups = ["teachers", "doctors", "engineers", "nurses", "artists", "lawyers"]
         times = ["an hour", "two hours", "half an hour", "45 minutes"]
@@ -2205,9 +2205,9 @@ def generate_77(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_78(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_78(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name1: str, name2: str, age_diff: int, age1: int) -> Dict[str, Any]:
+    def generate_from_variables(name1: str, name2: str, age_diff: int, age1: int) -> dict[str, Any]:
         age2 = age1 + age_diff
         avg_age = (age1 + age2) // 2
 
@@ -2235,7 +2235,7 @@ def generate_78(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Emma", "Olivia", "Ava", "Isabella", "Sophia", "Mia", "Charlotte", "Amelia"]
         name1, name2 = rng.sample(names, 2)
 
@@ -2262,7 +2262,7 @@ def generate_78(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_79(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_79(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -2273,7 +2273,7 @@ def generate_79(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         currency: str,
         first_hour_cost: int,
         multiplier: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_hours = end_time - start_time
         paid_hours = total_hours - free_hours
         other_hours = paid_hours - 1
@@ -2314,7 +2314,7 @@ def generate_79(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard"]
         vehicles = ["limousine", "party bus", "boat", "luxury car"]
         currencies = ["$", "€", "£"]
@@ -2357,11 +2357,11 @@ def generate_79(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_80(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_80(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, color1: str, color2: str, n1: int, n2: int, n3: int, n4: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         blue_spools = n1 + n2
         total_spools = n1 + n2 + n3 + n4
         percent_blue = int(100 * blue_spools / total_spools)
@@ -2389,7 +2389,7 @@ def generate_80(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Candy", "Sarah", "Emma", "Olivia", "Isabella", "Sophia", "Mia", "Charlotte"]
         colors = ["blue", "red", "green", "yellow", "purple", "orange"]
 
@@ -2424,11 +2424,11 @@ def generate_80(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_81(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_81(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         occupation: str, weeks_per_month: int, days_per_week: int, pay_per_day: int, currency: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         days_per_month = days_per_week * weeks_per_month
         monthly_pay = days_per_month * pay_per_day
         yearly_pay = monthly_pay * 12
@@ -2453,7 +2453,7 @@ def generate_81(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         occupations = ["plumber", "electrician", "painter", "carpenter", "landscaper"]
         currencies = ["$", "£", "€"]
 
@@ -2480,9 +2480,9 @@ def generate_81(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_82(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_82(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name: str, num_emails: int, no_response_percent: int, workdays: int) -> Dict[str, Any]:
+    def generate_from_variables(name: str, num_emails: int, no_response_percent: int, workdays: int) -> dict[str, Any]:
         no_response = num_emails * no_response_percent // 100
         responds_to = num_emails - no_response
         total_responses = responds_to * workdays
@@ -2512,7 +2512,7 @@ def generate_82(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph"]
         name = rng.choice(names)
 
@@ -2542,9 +2542,9 @@ def generate_82(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_83(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_83(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(name1: str, name2: str, total: int, diff: int, unit: str) -> Dict[str, Any]:
+    def generate_from_variables(name1: str, name2: str, total: int, diff: int, unit: str) -> dict[str, Any]:
         amount1 = (total - diff) // 2  # Sam's amount
         amount2 = amount1 + diff  # Harry's amount
 
@@ -2568,7 +2568,7 @@ def generate_83(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Sam", "Harry", "Tom", "John", "Mike", "Dave", "Steve", "Bob"]
         units = ["feet", "yards", "meters"]
 
@@ -2607,11 +2607,11 @@ def generate_83(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_84(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_84(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, miles: str, time_cold: int, extra_time: int, multiplier: float, distance: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         time_warm = extra_time + multiplier * time_cold
         time_cold_total = distance * time_cold
         time_warm_total = distance * time_warm
@@ -2646,7 +2646,7 @@ def generate_84(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Ray", "Jim", "Bob", "Tom", "Mike", "John", "Steve"]
         units = ["mile", "kilometer"]
 
@@ -2695,11 +2695,11 @@ def generate_84(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_85(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_85(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, room_type: str, area: int, length: int, unit1: str, unit2: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         conversion = 3 if unit1 == "feet" and unit2 == "yards" else 1
         length_converted = length * conversion
         width = area // length_converted
@@ -2730,7 +2730,7 @@ def generate_85(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         def calculate_max_width(difficulty: float, length: int, conversion: int) -> int:
             # Cap the maximum width to avoid numbers getting too large
             theoretical_max = int(100 * difficulty)
@@ -2784,7 +2784,7 @@ def generate_85(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_86(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_86(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         animals: str,
@@ -2802,7 +2802,7 @@ def generate_86(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         w3: int,
         w4: int,
         total: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         # Calculate weights
         sugar_weight = n4 * w4
         carrot_weight = n3 * w3
@@ -2831,7 +2831,7 @@ def generate_86(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         animals = rng.choice(["horses", "cows", "sheep", "pigs", "alpacas"])
         unit = rng.choice(["pound", "kilogram"])
         feed_options = ["hay", "corn", "oats", "apples", "wheat"]
@@ -2876,7 +2876,7 @@ def generate_86(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_87(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_87(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -2888,7 +2888,7 @@ def generate_87(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         num_medium: int,
         num_small: int,
         total_amount: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         large_used = num_large * weight_large
         medium_used = num_medium * weight_medium
@@ -2926,7 +2926,7 @@ def generate_87(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Peter", "John", "Michael", "David", "James", "Robert", "William"]
         units = ["ounce", "gram", "milliliter"]
 
@@ -2963,9 +2963,9 @@ def generate_87(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_88(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_88(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
-    def generate_from_variables(school: str, venue: str, total: int, graduates: int, faculty: int) -> Dict[str, Any]:
+    def generate_from_variables(school: str, venue: str, total: int, graduates: int, faculty: int) -> dict[str, Any]:
         remaining_seats = total - (graduates + faculty)
         tickets_per_graduate = remaining_seats // graduates
 
@@ -2989,7 +2989,7 @@ def generate_88(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         schools = ["Oakwood High School", "Riverside Academy", "Sunnyside High", "Greenville High School"]
         venues = ["Auditorium", "Gymnasium", "Sports Arena", "Convention Center"]
 
@@ -3019,7 +3019,7 @@ def generate_88(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_89(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_89(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str,
@@ -3032,7 +3032,7 @@ def generate_89(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         multiplier: float,
         less_amount: int,
         fraction: float,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_eggs = num_dozen * 12
         found_second = found_first * multiplier
         found_third = found_second - less_amount
@@ -3072,7 +3072,7 @@ def generate_89(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = [
             "Emma",
             "Liam",
@@ -3135,11 +3135,11 @@ def generate_89(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_90(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_90(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         device: str, currency: str, rate1: float, rate2: float, threshold: int, total_mins: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         first_period = threshold
         second_period = total_mins - threshold
 
@@ -3168,7 +3168,7 @@ def generate_90(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         devices = ["payphone", "phone booth", "hotel room phone"]
         currencies = ["$", "£", "€"]
 
@@ -3211,11 +3211,11 @@ def generate_90(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_91(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_91(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, fruit: str, area: str, field_size: int, density: int, months: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         fruits_per_harvest = field_size * density
         harvests_per_year = 12 // months
         total_fruits = fruits_per_harvest * harvests_per_year
@@ -3241,7 +3241,7 @@ def generate_91(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "Robert", "William", "Richard"]
         fruits = ["pineapple", "mango", "banana", "papaya", "coconut"]
         areas = ["hectare", "square yard", "square meter"]
@@ -3270,7 +3270,7 @@ def generate_91(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_92(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_92(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -3287,7 +3287,7 @@ def generate_92(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         num3: int,
         unit: str,
         currency: str,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         round_p1 = round(price1)
         round_p2 = round(price2)
@@ -3323,7 +3323,7 @@ def generate_92(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Mike", "David", "James", "Robert", "William", "Richard"]
         products = ["vegetable", "flower", "herb", "plant"]
         locations = ["local fair", "community market", "street bazaar", "town square"]
@@ -3365,7 +3365,7 @@ def generate_92(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_93(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_93(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str,
@@ -3382,7 +3382,7 @@ def generate_93(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         frac1: float,
         mult1: int,
         frac2: float,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         kills_arthur = int(n1 * frac1)
         kills_walter = int(kills_arthur * mult1)
@@ -3418,7 +3418,7 @@ def generate_93(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names_male = ["Arthur", "Bruce", "Charles", "David", "Edward", "Frederick", "George", "Henry"]
         creatures = ["ogres", "trolls", "goblins", "orcs", "giants"]
         weapons1 = ["sword", "mace", "battle axe", "war hammer"]
@@ -3469,11 +3469,11 @@ def generate_93(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_94(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_94(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, num_shares: int, price_per_share: int, increase_pct: int, decrease_pct: int
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         initial_value = num_shares * price_per_share
         first_increase = initial_value * increase_pct / 100
         value_after_increase = initial_value + first_increase
@@ -3507,7 +3507,7 @@ def generate_94(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Maria", "Sarah", "Emma", "Isabella", "Sophia", "Mia", "Charlotte"]
         name = rng.choice(names)
 
@@ -3542,11 +3542,11 @@ def generate_94(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_95(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_95(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name1: str, name2: str, relation: str, food: str, n1: int, n2: int, n3: int, time_unit: str, time_period: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         daily_total = n1 + n2 + n3
         total = daily_total * (7 if time_period == "week" else 30)
 
@@ -3573,7 +3573,7 @@ def generate_95(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         name1_options = ["A father", "A grandfather", "An uncle"]
         name2_options = ["his wife", "his partner", "his spouse"]
         relation_options = ["daughter", "son", "grandchild"]
@@ -3607,11 +3607,11 @@ def generate_95(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_96(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_96(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, food: str, animal1: str, animal2: str, n1: int, n2: int, k1: int, k2: int, unit: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         animal2_amount = 2 * n1 - n2  # Amount per sheep
         animal2_total = k2 * animal2_amount  # Total for sheep
         animal1_total = k1 * n1  # Total for goats
@@ -3647,7 +3647,7 @@ def generate_96(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "Robert", "William", "Richard"]
         foods = ["hay", "grain", "feed", "silage"]
         animals = ["goat", "cow", "horse", "donkey", "llama", "alpaca", "pig", "turkey", "duck"]
@@ -3685,11 +3685,11 @@ def generate_96(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_97(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_97(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, mult_run: int, frac_skip: float, skip_speed: int, total_time: int, frac_run: float, frac_walk: float
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         run_speed = skip_speed / frac_skip
         walk_speed = run_speed / mult_run
         walk_hours = total_time * frac_walk
@@ -3728,7 +3728,7 @@ def generate_97(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["Dana", "Emma", "Sarah", "Julia", "Sophie", "Maria"]
         name = rng.choice(names)
 
@@ -3781,7 +3781,7 @@ def generate_97(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_98(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_98(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str,
@@ -3794,7 +3794,7 @@ def generate_98(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
         weight_passenger: int,
         unit: str,
         force_percent: int,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
 
         total_passenger_weight = num_passengers * weight_passenger
         total_weight = weight_vehicle + weight_item + total_passenger_weight
@@ -3824,7 +3824,7 @@ def generate_98(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["John", "Michael", "David", "James", "Robert", "William", "Richard"]
         vehicles = ["car", "van", "truck", "SUV"]
         items = ["luggage", "groceries", "equipment", "furniture"]
@@ -3878,11 +3878,11 @@ def generate_98(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
     return generate_example(rng, difficulty)
 
 
-def generate_99(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+def generate_99(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
 
     def generate_from_variables(
         name: str, currency: str, initial_amount: float, quantity: int, item: str, store_type: str, unit_price: float
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         total_cost = quantity * unit_price
         remaining = initial_amount - total_cost
 
@@ -3908,7 +3908,7 @@ def generate_99(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
             },
         }
 
-    def generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:
+    def generate_example(rng: Random, difficulty: float = 1.0) -> dict[str, Any]:
         names = ["David", "John", "Michael", "James", "William", "Robert"]
         currencies = ["$", "€", "£"]
         items = ["screw", "nail", "washer", "nut", "anchor"]
diff --git a/reasoning_gym/arithmetic/lcm.py b/reasoning_gym/arithmetic/lcm.py
index 19402fd9..893ed316 100644
--- a/reasoning_gym/arithmetic/lcm.py
+++ b/reasoning_gym/arithmetic/lcm.py
@@ -4,7 +4,7 @@ from dataclasses import dataclass
 from functools import reduce
 from math import lcm
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -34,11 +34,11 @@ class LCMDataset(ProceduralDataset):
     def __init__(self, config: LCMConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _generate_numbers(self, rng: Random) -> Tuple[List[int], int]:
+    def _generate_numbers(self, rng: Random) -> tuple[list[int], int]:
         """Generate a list of random positive integers and their LCM.
         Will try up to 3 times to find numbers with LCM < product."""
 
-        def calculate_product(nums: List[int]) -> int:
+        def calculate_product(nums: list[int]) -> int:
             return reduce(lambda x, y: x * y, nums)
 
         # Try up to 3 times to get LCM < product
diff --git a/reasoning_gym/arithmetic/leg_counting.py b/reasoning_gym/arithmetic/leg_counting.py
index e2278b1c..b8a15191 100644
--- a/reasoning_gym/arithmetic/leg_counting.py
+++ b/reasoning_gym/arithmetic/leg_counting.py
@@ -93,7 +93,7 @@ class LegCountingDataset(ProceduralDataset):
     def __init__(self, config: LegCountingConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _generate_animals(self, rng: Random) -> Dict[str, int]:
+    def _generate_animals(self, rng: Random) -> dict[str, int]:
         """Generate a random set of animals and their counts"""
         num_types = rng.randint(self.config.min_animals, self.config.max_animals)
         animals = {}
diff --git a/reasoning_gym/arithmetic/number_format.py b/reasoning_gym/arithmetic/number_format.py
index e03d2bdc..36e66c1d 100644
--- a/reasoning_gym/arithmetic/number_format.py
+++ b/reasoning_gym/arithmetic/number_format.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -67,7 +67,7 @@ class NumberFormatDataset(ProceduralDataset):
                 output.append(f"{candidate:.15e}")
         return output
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["metadata"]["solution"]
         if answer is not None and len(answer) > 0:
diff --git a/reasoning_gym/arithmetic/power_function.py b/reasoning_gym/arithmetic/power_function.py
index 24391a12..a4fb93c7 100644
--- a/reasoning_gym/arithmetic/power_function.py
+++ b/reasoning_gym/arithmetic/power_function.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass
 from math import pow
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -46,7 +46,7 @@ class PowerFunctionDataset(ProceduralDataset):
     def __init__(self, config: PowerFunctionConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["answer"]
         if answer is not None:
diff --git a/reasoning_gym/arithmetic/prime_factorization.py b/reasoning_gym/arithmetic/prime_factorization.py
index 55ec14ad..f6b03845 100644
--- a/reasoning_gym/arithmetic/prime_factorization.py
+++ b/reasoning_gym/arithmetic/prime_factorization.py
@@ -3,7 +3,7 @@
 import math
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, List, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -29,7 +29,7 @@ class PrimeFactorizationDataset(ProceduralDataset):
     def __init__(self, config: PrimeFactorizationConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _prime_factors(self, n: int) -> List[int]:
+    def _prime_factors(self, n: int) -> list[int]:
         """Compute prime factors of a number"""
         factors = []
         d = 2
@@ -44,11 +44,11 @@ class PrimeFactorizationDataset(ProceduralDataset):
                 break
         return factors
 
-    def _normalize_answer(self, answer: str) -> List[int]:
+    def _normalize_answer(self, answer: str) -> list[int]:
         """Parse and sort factors from a string"""
         return sorted([int(factor.strip()) for factor in answer.split("×")])
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         oracle_answer = entry["answer"]
         reward = 0.0
         if answer is not None:
diff --git a/reasoning_gym/arithmetic/products.py b/reasoning_gym/arithmetic/products.py
index fe777e2b..8401be91 100644
--- a/reasoning_gym/arithmetic/products.py
+++ b/reasoning_gym/arithmetic/products.py
@@ -1,6 +1,8 @@
 import random
 from dataclasses import dataclass
-from typing import Optional
+from typing import Any, Optional
+
+from reasoning_gym import utils
 
 from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition
 from ..factory import ProceduralDataset, register_dataset
@@ -100,6 +102,9 @@ class ProductsDataset(ProceduralDataset):
         expression = " ".join(expression_parts)
         return expression, result
 
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        return utils.compute_decimal_reward(answer, oracle_answer=entry["answer"])
+
 
 class ProductsCurriculum(BaseCurriculum):
     def __init__(self):
diff --git a/reasoning_gym/arithmetic/time_intervals.py b/reasoning_gym/arithmetic/time_intervals.py
index f4011ba7..58e30cba 100644
--- a/reasoning_gym/arithmetic/time_intervals.py
+++ b/reasoning_gym/arithmetic/time_intervals.py
@@ -1,7 +1,7 @@
 import random
 from dataclasses import dataclass, field
 from datetime import date, datetime, time, timedelta
-from typing import List, Optional
+from typing import Optional
 
 import pytz
 from dateutil import parser
@@ -19,7 +19,7 @@ class TimeIntervalsConfig:
     min_date: date = date(1900, 1, 1)
     max_date: date = date(3000, 1, 1)
     max_date_difference_days: int = 100
-    task_types: List[str] = field(
+    task_types: list[str] = field(
         default_factory=lambda: ["time", "time_seconds", "time_ms", "date", "datetime", "datetime_tz"]
     )
     seed: Optional[int] = None
diff --git a/reasoning_gym/coaching/base_curriculum.py b/reasoning_gym/coaching/base_curriculum.py
index b3e97672..20684f75 100644
--- a/reasoning_gym/coaching/base_curriculum.py
+++ b/reasoning_gym/coaching/base_curriculum.py
@@ -11,7 +11,7 @@ class BaseCurriculum:
         self._attributes: dict[str, AttributeDefinition] = {}
         self._current_levels: dict[str, int] = {}
 
-    def generate_configuration(self, defaults: Optional[dict[str, any]] = None) -> ConfigT:
+    def generate_configuration(self, defaults: Optional[dict[str, Any]] = None) -> ConfigT:
         config_args = defaults.copy() if defaults is not None else {}
         for attr in self._attributes.values():
             if isinstance(attr, RangeAttributeDefinition):
diff --git a/reasoning_gym/coaching/coach.py b/reasoning_gym/coaching/coach.py
index eeeab5d1..5142e666 100644
--- a/reasoning_gym/coaching/coach.py
+++ b/reasoning_gym/coaching/coach.py
@@ -6,7 +6,7 @@ from collections import OrderedDict
 from dataclasses import dataclass, field
 from pathlib import Path
 from statistics import mean, stdev
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union
 
 from ..dataset import ProceduralDataset
 
@@ -15,7 +15,7 @@ from ..dataset import ProceduralDataset
 class ScoreStats:
     """Container for score statistics with mean, std, min, max"""
 
-    scores: OrderedDict[Tuple[Tuple[str, Any], ...], Tuple[int, float, float, float, float]]
+    scores: OrderedDict[tuple[tuple[str, Any], ...], tuple[int, float, float, float, float]]
 
     def __str__(self) -> str:
         """Create a formatted report of the statistics
@@ -41,7 +41,7 @@ class ScoreStats:
 class GroupedScores:
     """Container for grouped scores with total count"""
 
-    scores: OrderedDict[Tuple[Tuple[str, Any], ...], List[float]]
+    scores: OrderedDict[tuple[tuple[str, Any], ...], list[float]]
     total_scores: int
 
     def __str__(self) -> str:
@@ -114,11 +114,11 @@ class GroupedScores:
 class ScoreBoard:
     """Tracks scores and metadata for coaching sessions"""
 
-    scores: List[float] = field(default_factory=list)
-    metadata: List[Dict[str, Any]] = field(default_factory=list)
-    conversations: List[Optional[List[Dict]]] = field(default_factory=list)
+    scores: list[float] = field(default_factory=list)
+    metadata: list[dict[str, Any]] = field(default_factory=list)
+    conversations: list[Optional[list[dict]]] = field(default_factory=list)
 
-    def add_score(self, score: float, metadata: Dict[str, Any], conversation: Optional[List[Dict]] = None) -> None:
+    def add_score(self, score: float, metadata: dict[str, Any], conversation: Optional[list[dict]] = None) -> None:
         """Add a new score entry with associated metadata and optional conversation
 
         Args:
@@ -140,7 +140,7 @@ class ScoreBoard:
         """Return the number of stored scores"""
         return len(self.scores)
 
-    def _metadata_to_key(self, metadata: Dict[str, Any]) -> Tuple[Tuple[str, Any], ...]:
+    def _metadata_to_key(self, metadata: dict[str, Any]) -> tuple[tuple[str, Any], ...]:
         """Convert metadata dict to tuple of key-value pairs, sorted by key
 
         If source_dataset and source_index are present in metadata, they will be
@@ -222,7 +222,7 @@ class Coach(ProceduralDataset):
         return self.dataset[idx]
 
     def score_answer(
-        self, answer: Optional[str], entry: Dict[str, Any], conversation: Optional[List[Dict]] = None
+        self, answer: Optional[str], entry: dict[str, Any], conversation: Optional[list[dict]] = None
     ) -> float:
         """Score answer and track results
 
diff --git a/reasoning_gym/coaching/registry.py b/reasoning_gym/coaching/registry.py
index 5d7fdd1a..076ea1cd 100644
--- a/reasoning_gym/coaching/registry.py
+++ b/reasoning_gym/coaching/registry.py
@@ -1,6 +1,6 @@
 """Registry for managing active experiments."""
 
-from typing import Dict, List, Optional
+from typing import Optional
 
 from ..composite import CompositeConfig
 from .experiment import Experiment
@@ -25,7 +25,7 @@ class ExperimentRegistry:
         """Get an experiment by name."""
         return self._experiments.get(name)
 
-    def list_experiments(self) -> List[str]:
+    def list_experiments(self) -> list[str]:
         """List all registered experiment names."""
         return list(self._experiments.keys())
 
diff --git a/reasoning_gym/code/bf.py b/reasoning_gym/code/bf.py
index e7def05e..3a7d9b0d 100644
--- a/reasoning_gym/code/bf.py
+++ b/reasoning_gym/code/bf.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 import bfi
 
@@ -108,14 +108,14 @@ int main() {{
         # bf = Minify.minify(bf) # Is this necessary?
         return bf
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the BF task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/cognition/__init__.py b/reasoning_gym/cognition/__init__.py
index 473fee97..d43c60f3 100644
--- a/reasoning_gym/cognition/__init__.py
+++ b/reasoning_gym/cognition/__init__.py
@@ -4,6 +4,7 @@ Cognition tasks for training reasoning capabilities.
 
 from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset
 from .figlet_fonts import FigletFontConfig, FigletFontDataset
+from .needle_haystack import NeedleHaystackConfig, NeedleHaystackDataset
 from .number_sequences import NumberSequenceConfig, NumberSequenceDataset
 from .rectangle_count import RectangleCountConfig, RectangleCountDataset
 from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset
@@ -19,4 +20,6 @@ __all__ = [
     "RubiksCubeDataset",
     "RectangleCountConfig",
     "RectangleCountDataset",
+    "NeedleHaystackConfig",
+    "NeedleHaystackDataset",
 ]
diff --git a/reasoning_gym/cognition/color_cube_rotation.py b/reasoning_gym/cognition/color_cube_rotation.py
index 4b8dc30a..fe7feeea 100644
--- a/reasoning_gym/cognition/color_cube_rotation.py
+++ b/reasoning_gym/cognition/color_cube_rotation.py
@@ -1,7 +1,7 @@
 import random
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -38,7 +38,7 @@ class Side(StrEnum):
 class Cube:
     """Represents a cube with colored sides"""
 
-    colors: Dict[Side, Color]
+    colors: dict[Side, Color]
 
     def rotate_front_to_top(self) -> None:
         """Rotate cube so front face becomes top"""
@@ -162,7 +162,7 @@ class ColorCubeRotationDataset(ProceduralDataset):
             rotation_map[from_side]()
 
     def _generate_story(
-        self, initial_state: Dict[Side, Color], rotations: List[Side], target_side: Side, rng: random.Random
+        self, initial_state: dict[Side, Color], rotations: list[Side], target_side: Side, rng: random.Random
     ) -> str:
         """Generate story describing cube state and rotations"""
         # Describe initial state
@@ -189,7 +189,7 @@ class ColorCubeRotationDataset(ProceduralDataset):
 
         return "\n".join(story_parts)
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         reward = 0.0
         metadata = entry["metadata"]
         if answer is not None:
diff --git a/reasoning_gym/cognition/figlet_fonts.py b/reasoning_gym/cognition/figlet_fonts.py
index 7274a4ff..70f2e5e8 100644
--- a/reasoning_gym/cognition/figlet_fonts.py
+++ b/reasoning_gym/cognition/figlet_fonts.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 import pyfiglet
 
@@ -119,7 +119,7 @@ class FigletFontDataset(ProceduralDataset):
             "metadata": {"font": chosen_font, "space_letters": self.config.space_letters},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the figlet task.
 
         The function awards 1.0 for a correct answer and 0.1 points for each correct letter in the correct position,
@@ -127,7 +127,7 @@ class FigletFontDataset(ProceduralDataset):
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/cognition/needle_data.py b/reasoning_gym/cognition/needle_data.py
new file mode 100644
index 00000000..5367314b
--- /dev/null
+++ b/reasoning_gym/cognition/needle_data.py
@@ -0,0 +1,3379 @@
+NAMES = [
+    "Aaran",
+    "Aaren",
+    "Aarez",
+    "Aarman",
+    "Aaron",
+    "Aaron-James",
+    "Aarron",
+    "Aaryan",
+    "Aaryn",
+    "Aayan",
+    "Aazaan",
+    "Abaan",
+    "Abbas",
+    "Abdallah",
+    "Abdalroof",
+    "Abdihakim",
+    "Abdirahman",
+    "Abdisalam",
+    "Abdul",
+    "Abdul-Aziz",
+    "Abdulbasir",
+    "Abdulkadir",
+    "Abdulkarem",
+    "Abdulkhader",
+    "Abdullah",
+    "Abdul-Majeed",
+    "Abdulmalik",
+    "Abdul-Rehman",
+    "Abdur",
+    "Abdurraheem",
+    "Abdur-Rahman",
+    "Abdur-Rehmaan",
+    "Abel",
+    "Abhinav",
+    "Abhisumant",
+    "Abid",
+    "Abir",
+    "Abraham",
+    "Abu",
+    "Abubakar",
+    "Ace",
+    "Adain",
+    "Adam",
+    "Adam-James",
+    "Addison",
+    "Addisson",
+    "Adegbola",
+    "Adegbolahan",
+    "Aden",
+    "Adenn",
+    "Adie",
+    "Adil",
+    "Aditya",
+    "Adnan",
+    "Adrian",
+    "Adrien",
+    "Aedan",
+    "Aedin",
+    "Aedyn",
+    "Aeron",
+    "Afonso",
+    "Ahmad",
+    "Ahmed",
+    "Ahmed-Aziz",
+    "Ahoua",
+    "Ahtasham",
+    "Aiadan",
+    "Aidan",
+    "Aiden",
+    "Aiden-Jack",
+    "Aiden-Vee",
+    "Aidian",
+    "Aidy",
+    "Ailin",
+    "Aiman",
+    "Ainsley",
+    "Ainslie",
+    "Airen",
+    "Airidas",
+    "Airlie",
+    "AJ",
+    "Ajay",
+    "A-Jay",
+    "Ajayraj",
+    "Akan",
+    "Akram",
+    "Al",
+    "Ala",
+    "Alan",
+    "Alanas",
+    "Alasdair",
+    "Alastair",
+    "Alber",
+    "Albert",
+    "Albie",
+    "Aldred",
+    "Alec",
+    "Aled",
+    "Aleem",
+    "Aleksandar",
+    "Aleksander",
+    "Aleksandr",
+    "Aleksandrs",
+    "Alekzander",
+    "Alessandro",
+    "Alessio",
+    "Alex",
+    "Alexander",
+    "Alexei",
+    "Alexx",
+    "Alexzander",
+    "Alf",
+    "Alfee",
+    "Alfie",
+    "Alfred",
+    "Alfy",
+    "Alhaji",
+    "Al-Hassan",
+    "Ali",
+    "Aliekber",
+    "Alieu",
+    "Alihaider",
+    "Alisdair",
+    "Alishan",
+    "Alistair",
+    "Alistar",
+    "Alister",
+    "Aliyaan",
+    "Allan",
+    "Allan-Laiton",
+    "Allen",
+    "Allesandro",
+    "Allister",
+    "Ally",
+    "Alphonse",
+    "Altyiab",
+    "Alum",
+    "Alvern",
+    "Alvin",
+    "Alyas",
+    "Amaan",
+    "Aman",
+    "Amani",
+    "Ambanimoh",
+    "Ameer",
+    "Amgad",
+    "Ami",
+    "Amin",
+    "Amir",
+    "Ammaar",
+    "Ammar",
+    "Ammer",
+    "Amolpreet",
+    "Amos",
+    "Amrinder",
+    "Amrit",
+    "Amro",
+    "Anay",
+    "Andrea",
+    "Andreas",
+    "Andrei",
+    "Andrejs",
+    "Andrew",
+    "Andy",
+    "Anees",
+    "Anesu",
+    "Angel",
+    "Angelo",
+    "Angus",
+    "Anir",
+    "Anis",
+    "Anish",
+    "Anmolpreet",
+    "Annan",
+    "Anndra",
+    "Anselm",
+    "Anthony",
+    "Anthony-John",
+    "Antoine",
+    "Anton",
+    "Antoni",
+    "Antonio",
+    "Antony",
+    "Antonyo",
+    "Anubhav",
+    "Aodhan",
+    "Aon",
+    "Aonghus",
+    "Apisai",
+    "Arafat",
+    "Aran",
+    "Arandeep",
+    "Arann",
+    "Aray",
+    "Arayan",
+    "Archibald",
+    "Archie",
+    "Arda",
+    "Ardal",
+    "Ardeshir",
+    "Areeb",
+    "Areez",
+    "Aref",
+    "Arfin",
+    "Argyle",
+    "Argyll",
+    "Ari",
+    "Aria",
+    "Arian",
+    "Arihant",
+    "Aristomenis",
+    "Aristotelis",
+    "Arjuna",
+    "Arlo",
+    "Armaan",
+    "Arman",
+    "Armen",
+    "Arnab",
+    "Arnav",
+    "Arnold",
+    "Aron",
+    "Aronas",
+    "Arran",
+    "Arrham",
+    "Arron",
+    "Arryn",
+    "Arsalan",
+    "Artem",
+    "Arthur",
+    "Artur",
+    "Arturo",
+    "Arun",
+    "Arunas",
+    "Arved",
+    "Arya",
+    "Aryan",
+    "Aryankhan",
+    "Aryian",
+    "Aryn",
+    "Asa",
+    "Asfhan",
+    "Ash",
+    "Ashlee-jay",
+    "Ashley",
+    "Ashton",
+    "Ashton-Lloyd",
+    "Ashtyn",
+    "Ashwin",
+    "Asif",
+    "Asim",
+    "Aslam",
+    "Asrar",
+    "Ata",
+    "Atal",
+    "Atapattu",
+    "Ateeq",
+    "Athol",
+    "Athon",
+    "Athos-Carlos",
+    "Atli",
+    "Atom",
+    "Attila",
+    "Aulay",
+    "Aun",
+    "Austen",
+    "Austin",
+    "Avani",
+    "Averon",
+    "Avi",
+    "Avinash",
+    "Avraham",
+    "Awais",
+    "Awwal",
+    "Axel",
+    "Ayaan",
+    "Ayan",
+    "Aydan",
+    "Ayden",
+    "Aydin",
+    "Aydon",
+    "Ayman",
+    "Ayomide",
+    "Ayren",
+    "Ayrton",
+    "Aytug",
+    "Ayub",
+    "Ayyub",
+    "Azaan",
+    "Azedine",
+    "Azeem",
+    "Azim",
+    "Aziz",
+    "Azlan",
+    "Azzam",
+    "Azzedine",
+    "Babatunmise",
+    "Babur",
+    "Bader",
+    "Badr",
+    "Badsha",
+    "Bailee",
+    "Bailey",
+    "Bailie",
+    "Bailley",
+    "Baillie",
+    "Baley",
+    "Balian",
+    "Banan",
+    "Barath",
+    "Barkley",
+    "Barney",
+    "Baron",
+    "Barrie",
+    "Barry",
+    "Bartlomiej",
+    "Bartosz",
+    "Basher",
+    "Basile",
+    "Baxter",
+    "Baye",
+    "Bayley",
+    "Beau",
+    "Beinn",
+    "Bekim",
+    "Believe",
+    "Ben",
+    "Bendeguz",
+    "Benedict",
+    "Benjamin",
+    "Benjamyn",
+    "Benji",
+    "Benn",
+    "Bennett",
+    "Benny",
+    "Benoit",
+    "Bentley",
+    "Berkay",
+    "Bernard",
+    "Bertie",
+    "Bevin",
+    "Bezalel",
+    "Bhaaldeen",
+    "Bharath",
+    "Bilal",
+    "Bill",
+    "Billy",
+    "Binod",
+    "Bjorn",
+    "Blaike",
+    "Blaine",
+    "Blair",
+    "Blaire",
+    "Blake",
+    "Blazej",
+    "Blazey",
+    "Blessing",
+    "Blue",
+    "Blyth",
+    "Bo",
+    "Boab",
+    "Bob",
+    "Bobby",
+    "Bobby-Lee",
+    "Bodhan",
+    "Boedyn",
+    "Bogdan",
+    "Bohbi",
+    "Bony",
+    "Bowen",
+    "Bowie",
+    "Boyd",
+    "Bracken",
+    "Brad",
+    "Bradan",
+    "Braden",
+    "Bradley",
+    "Bradlie",
+    "Bradly",
+    "Brady",
+    "Bradyn",
+    "Braeden",
+    "Braiden",
+    "Brajan",
+    "Brandan",
+    "Branden",
+    "Brandon",
+    "Brandonlee",
+    "Brandon-Lee",
+    "Brandyn",
+    "Brannan",
+    "Brayden",
+    "Braydon",
+    "Braydyn",
+    "Breandan",
+    "Brehme",
+    "Brendan",
+    "Brendon",
+    "Brendyn",
+    "Breogan",
+    "Bret",
+    "Brett",
+    "Briaddon",
+    "Brian",
+    "Brodi",
+    "Brodie",
+    "Brody",
+    "Brogan",
+    "Broghan",
+    "Brooke",
+    "Brooklin",
+    "Brooklyn",
+    "Bruce",
+    "Bruin",
+    "Bruno",
+    "Brunon",
+    "Bryan",
+    "Bryce",
+    "Bryden",
+    "Brydon",
+    "Brydon-Craig",
+    "Bryn",
+    "Brynmor",
+    "Bryson",
+    "Buddy",
+    "Bully",
+    "Burak",
+    "Burhan",
+    "Butali",
+    "Butchi",
+    "Byron",
+    "Cabhan",
+    "Cadan",
+    "Cade",
+    "Caden",
+    "Cadon",
+    "Cadyn",
+    "Caedan",
+    "Caedyn",
+    "Cael",
+    "Caelan",
+    "Caelen",
+    "Caethan",
+    "Cahl",
+    "Cahlum",
+    "Cai",
+    "Caidan",
+    "Caiden",
+    "Caiden-Paul",
+    "Caidyn",
+    "Caie",
+    "Cailaen",
+    "Cailean",
+    "Caileb-John",
+    "Cailin",
+    "Cain",
+    "Caine",
+    "Cairn",
+    "Cal",
+    "Calan",
+    "Calder",
+    "Cale",
+    "Calean",
+    "Caleb",
+    "Calen",
+    "Caley",
+    "Calib",
+    "Calin",
+    "Callahan",
+    "Callan",
+    "Callan-Adam",
+    "Calley",
+    "Callie",
+    "Callin",
+    "Callum",
+    "Callun",
+    "Callyn",
+    "Calum",
+    "Calum-James",
+    "Calvin",
+    "Cambell",
+    "Camerin",
+    "Cameron",
+    "Campbel",
+    "Campbell",
+    "Camron",
+    "Caolain",
+    "Caolan",
+    "Carl",
+    "Carlo",
+    "Carlos",
+    "Carrich",
+    "Carrick",
+    "Carson",
+    "Carter",
+    "Carwyn",
+    "Casey",
+    "Casper",
+    "Cassy",
+    "Cathal",
+    "Cator",
+    "Cavan",
+    "Cayden",
+    "Cayden-Robert",
+    "Cayden-Tiamo",
+    "Ceejay",
+    "Ceilan",
+    "Ceiran",
+    "Ceirin",
+    "Ceiron",
+    "Cejay",
+    "Celik",
+    "Cephas",
+    "Cesar",
+    "Cesare",
+    "Chad",
+    "Chaitanya",
+    "Chang-Ha",
+    "Charles",
+    "Charley",
+    "Charlie",
+    "Charly",
+    "Chase",
+    "Che",
+    "Chester",
+    "Chevy",
+    "Chi",
+    "Chibudom",
+    "Chidera",
+    "Chimsom",
+    "Chin",
+    "Chintu",
+    "Chiqal",
+    "Chiron",
+    "Chris",
+    "Chris-Daniel",
+    "Chrismedi",
+    "Christian",
+    "Christie",
+    "Christoph",
+    "Christopher",
+    "Christopher-Lee",
+    "Christy",
+    "Chu",
+    "Chukwuemeka",
+    "Cian",
+    "Ciann",
+    "Ciar",
+    "Ciaran",
+    "Ciarian",
+    "Cieran",
+    "Cillian",
+    "Cillin",
+    "Cinar",
+    "CJ",
+    "C-Jay",
+    "Clark",
+    "Clarke",
+    "Clayton",
+    "Clement",
+    "Clifford",
+    "Clyde",
+    "Cobain",
+    "Coban",
+    "Coben",
+    "Cobi",
+    "Cobie",
+    "Coby",
+    "Codey",
+    "Codi",
+    "Codie",
+    "Cody",
+    "Cody-Lee",
+    "Coel",
+    "Cohan",
+    "Cohen",
+    "Colby",
+    "Cole",
+    "Colin",
+    "Coll",
+    "Colm",
+    "Colt",
+    "Colton",
+    "Colum",
+    "Colvin",
+    "Comghan",
+    "Conal",
+    "Conall",
+    "Conan",
+    "Conar",
+    "Conghaile",
+    "Conlan",
+    "Conley",
+    "Conli",
+    "Conlin",
+    "Conlly",
+    "Conlon",
+    "Conlyn",
+    "Connal",
+    "Connall",
+    "Connan",
+    "Connar",
+    "Connel",
+    "Connell",
+    "Conner",
+    "Connolly",
+    "Connor",
+    "Connor-David",
+    "Conor",
+    "Conrad",
+    "Cooper",
+    "Copeland",
+    "Coray",
+    "Corben",
+    "Corbin",
+    "Corey",
+    "Corey-James",
+    "Corey-Jay",
+    "Cori",
+    "Corie",
+    "Corin",
+    "Cormac",
+    "Cormack",
+    "Cormak",
+    "Corran",
+    "Corrie",
+    "Cory",
+    "Cosmo",
+    "Coupar",
+    "Craig",
+    "Craig-James",
+    "Crawford",
+    "Creag",
+    "Crispin",
+    "Cristian",
+    "Crombie",
+    "Cruiz",
+    "Cruz",
+    "Cuillin",
+    "Cullen",
+    "Cullin",
+    "Curtis",
+    "Cyrus",
+    "Daanyaal",
+    "Daegan",
+    "Daegyu",
+    "Dafydd",
+    "Dagon",
+    "Dailey",
+    "Daimhin",
+    "Daithi",
+    "Dakota",
+    "Daksh",
+    "Dale",
+    "Dalong",
+    "Dalton",
+    "Damian",
+    "Damien",
+    "Damon",
+    "Dan",
+    "Danar",
+    "Dane",
+    "Danial",
+    "Daniel",
+    "Daniele",
+    "Daniel-James",
+    "Daniels",
+    "Daniil",
+    "Danish",
+    "Daniyal",
+    "Danniel",
+    "Danny",
+    "Dante",
+    "Danyal",
+    "Danyil",
+    "Danys",
+    "Daood",
+    "Dara",
+    "Darach",
+    "Daragh",
+    "Darcy",
+    "D'arcy",
+    "Dareh",
+    "Daren",
+    "Darien",
+    "Darius",
+    "Darl",
+    "Darn",
+    "Darrach",
+    "Darragh",
+    "Darrel",
+    "Darrell",
+    "Darren",
+    "Darrie",
+    "Darrius",
+    "Darroch",
+    "Darryl",
+    "Darryn",
+    "Darwyn",
+    "Daryl",
+    "Daryn",
+    "Daud",
+    "Daumantas",
+    "Davi",
+    "David",
+    "David-Jay",
+    "David-Lee",
+    "Davie",
+    "Davis",
+    "Davy",
+    "Dawid",
+    "Dawson",
+    "Dawud",
+    "Dayem",
+    "Daymian",
+    "Deacon",
+    "Deagan",
+    "Dean",
+    "Deano",
+    "Decklan",
+    "Declain",
+    "Declan",
+    "Declyan",
+    "Declyn",
+    "Dedeniseoluwa",
+    "Deecan",
+    "Deegan",
+    "Deelan",
+    "Deklain-Jaimes",
+    "Del",
+    "Demetrius",
+    "Denis",
+    "Deniss",
+    "Dennan",
+    "Dennin",
+    "Dennis",
+    "Denny",
+    "Dennys",
+    "Denon",
+    "Denton",
+    "Denver",
+    "Denzel",
+    "Deon",
+    "Derek",
+    "Derick",
+    "Derin",
+    "Dermot",
+    "Derren",
+    "Derrie",
+    "Derrin",
+    "Derron",
+    "Derry",
+    "Derryn",
+    "Deryn",
+    "Deshawn",
+    "Desmond",
+    "Dev",
+    "Devan",
+    "Devin",
+    "Devlin",
+    "Devlyn",
+    "Devon",
+    "Devrin",
+    "Devyn",
+    "Dex",
+    "Dexter",
+    "Dhani",
+    "Dharam",
+    "Dhavid",
+    "Dhyia",
+    "Diarmaid",
+    "Diarmid",
+    "Diarmuid",
+    "Didier",
+    "Diego",
+    "Diesel",
+    "Diesil",
+    "Digby",
+    "Dilan",
+    "Dilano",
+    "Dillan",
+    "Dillon",
+    "Dilraj",
+    "Dimitri",
+    "Dinaras",
+    "Dion",
+    "Dissanayake",
+    "Dmitri",
+    "Doire",
+    "Dolan",
+    "Domanic",
+    "Domenico",
+    "Domhnall",
+    "Dominic",
+    "Dominick",
+    "Dominik",
+    "Donald",
+    "Donnacha",
+    "Donnie",
+    "Dorian",
+    "Dougal",
+    "Douglas",
+    "Dougray",
+    "Drakeo",
+    "Dre",
+    "Dregan",
+    "Drew",
+    "Dugald",
+    "Duncan",
+    "Duriel",
+    "Dustin",
+    "Dylan",
+    "Dylan-Jack",
+    "Dylan-James",
+    "Dylan-John",
+    "Dylan-Patrick",
+    "Dylin",
+    "Dyllan",
+    "Dyllan-James",
+    "Dyllon",
+    "Eadie",
+    "Eagann",
+    "Eamon",
+    "Eamonn",
+    "Eason",
+    "Eassan",
+    "Easton",
+    "Ebow",
+    "Ed",
+    "Eddie",
+    "Eden",
+    "Ediomi",
+    "Edison",
+    "Eduardo",
+    "Eduards",
+    "Edward",
+    "Edwin",
+    "Edwyn",
+    "Eesa",
+    "Efan",
+    "Efe",
+    "Ege",
+    "Ehsan",
+    "Ehsen",
+    "Eiddon",
+    "Eidhan",
+    "Eihli",
+    "Eimantas",
+    "Eisa",
+    "Eli",
+    "Elias",
+    "Elijah",
+    "Eliot",
+    "Elisau",
+    "Eljay",
+    "Eljon",
+    "Elliot",
+    "Elliott",
+    "Ellis",
+    "Ellisandro",
+    "Elshan",
+    "Elvin",
+    "Elyan",
+    "Emanuel",
+    "Emerson",
+    "Emil",
+    "Emile",
+    "Emir",
+    "Emlyn",
+    "Emmanuel",
+    "Emmet",
+    "Eng",
+    "Eniola",
+    "Enis",
+    "Ennis",
+    "Enrico",
+    "Enrique",
+    "Enzo",
+    "Eoghain",
+    "Eoghan",
+    "Eoin",
+    "Eonan",
+    "Erdehan",
+    "Eren",
+    "Erencem",
+    "Eric",
+    "Ericlee",
+    "Erik",
+    "Eriz",
+    "Ernie-Jacks",
+    "Eroni",
+    "Eryk",
+    "Eshan",
+    "Essa",
+    "Esteban",
+    "Ethan",
+    "Etienne",
+    "Etinosa",
+    "Euan",
+    "Eugene",
+    "Evan",
+    "Evann",
+    "Ewan",
+    "Ewen",
+    "Ewing",
+    "Exodi",
+    "Ezekiel",
+    "Ezra",
+    "Fabian",
+    "Fahad",
+    "Faheem",
+    "Faisal",
+    "Faizaan",
+    "Famara",
+    "Fares",
+    "Farhaan",
+    "Farhan",
+    "Farren",
+    "Farzad",
+    "Fauzaan",
+    "Favour",
+    "Fawaz",
+    "Fawkes",
+    "Faysal",
+    "Fearghus",
+    "Feden",
+    "Felix",
+    "Fergal",
+    "Fergie",
+    "Fergus",
+    "Ferre",
+    "Fezaan",
+    "Fiachra",
+    "Fikret",
+    "Filip",
+    "Filippo",
+    "Finan",
+    "Findlay",
+    "Findlay-James",
+    "Findlie",
+    "Finlay",
+    "Finley",
+    "Finn",
+    "Finnan",
+    "Finnean",
+    "Finnen",
+    "Finnlay",
+    "Finnley",
+    "Fintan",
+    "Fionn",
+    "Firaaz",
+    "Fletcher",
+    "Flint",
+    "Florin",
+    "Flyn",
+    "Flynn",
+    "Fodeba",
+    "Folarinwa",
+    "Forbes",
+    "Forgan",
+    "Forrest",
+    "Fox",
+    "Francesco",
+    "Francis",
+    "Francisco",
+    "Franciszek",
+    "Franco",
+    "Frank",
+    "Frankie",
+    "Franklin",
+    "Franko",
+    "Fraser",
+    "Frazer",
+    "Fred",
+    "Freddie",
+    "Frederick",
+    "Fruin",
+    "Fyfe",
+    "Fyn",
+    "Fynlay",
+    "Fynn",
+    "Gabriel",
+    "Gallagher",
+    "Gareth",
+    "Garren",
+    "Garrett",
+    "Garry",
+    "Gary",
+    "Gavin",
+    "Gavin-Lee",
+    "Gene",
+    "Geoff",
+    "Geoffrey",
+    "Geomer",
+    "Geordan",
+    "Geordie",
+    "George",
+    "Georgia",
+    "Georgy",
+    "Gerard",
+    "Ghyll",
+    "Giacomo",
+    "Gian",
+    "Giancarlo",
+    "Gianluca",
+    "Gianmarco",
+    "Gideon",
+    "Gil",
+    "Gio",
+    "Girijan",
+    "Girius",
+    "Gjan",
+    "Glascott",
+    "Glen",
+    "Glenn",
+    "Gordon",
+    "Grady",
+    "Graeme",
+    "Graham",
+    "Grahame",
+    "Grant",
+    "Grayson",
+    "Greg",
+    "Gregor",
+    "Gregory",
+    "Greig",
+    "Griffin",
+    "Griffyn",
+    "Grzegorz",
+    "Guang",
+    "Guerin",
+    "Guillaume",
+    "Gurardass",
+    "Gurdeep",
+    "Gursees",
+    "Gurthar",
+    "Gurveer",
+    "Gurwinder",
+    "Gus",
+    "Gustav",
+    "Guthrie",
+    "Guy",
+    "Gytis",
+    "Habeeb",
+    "Hadji",
+    "Hadyn",
+    "Hagun",
+    "Haiden",
+    "Haider",
+    "Hamad",
+    "Hamid",
+    "Hamish",
+    "Hamza",
+    "Hamzah",
+    "Han",
+    "Hansen",
+    "Hao",
+    "Hareem",
+    "Hari",
+    "Harikrishna",
+    "Haris",
+    "Harish",
+    "Harjeevan",
+    "Harjyot",
+    "Harlee",
+    "Harleigh",
+    "Harley",
+    "Harman",
+    "Harnek",
+    "Harold",
+    "Haroon",
+    "Harper",
+    "Harri",
+    "Harrington",
+    "Harris",
+    "Harrison",
+    "Harry",
+    "Harvey",
+    "Harvie",
+    "Harvinder",
+    "Hasan",
+    "Haseeb",
+    "Hashem",
+    "Hashim",
+    "Hassan",
+    "Hassanali",
+    "Hately",
+    "Havila",
+    "Hayden",
+    "Haydn",
+    "Haydon",
+    "Haydyn",
+    "Hcen",
+    "Hector",
+    "Heddle",
+    "Heidar",
+    "Heini",
+    "Hendri",
+    "Henri",
+    "Henry",
+    "Herbert",
+    "Heyden",
+    "Hiro",
+    "Hirvaansh",
+    "Hishaam",
+    "Hogan",
+    "Honey",
+    "Hong",
+    "Hope",
+    "Hopkin",
+    "Hosea",
+    "Howard",
+    "Howie",
+    "Hristomir",
+    "Hubert",
+    "Hugh",
+    "Hugo",
+    "Humza",
+    "Hunter",
+    "Husnain",
+    "Hussain",
+    "Hussan",
+    "Hussnain",
+    "Hussnan",
+    "Hyden",
+    "I",
+    "Iagan",
+    "Iain",
+    "Ian",
+    "Ibraheem",
+    "Ibrahim",
+    "Idahosa",
+    "Idrees",
+    "Idris",
+    "Iestyn",
+    "Ieuan",
+    "Igor",
+    "Ihtisham",
+    "Ijay",
+    "Ikechukwu",
+    "Ikemsinachukwu",
+    "Ilyaas",
+    "Ilyas",
+    "Iman",
+    "Immanuel",
+    "Inan",
+    "Indy",
+    "Ines",
+    "Innes",
+    "Ioannis",
+    "Ireayomide",
+    "Ireoluwa",
+    "Irvin",
+    "Irvine",
+    "Isa",
+    "Isaa",
+    "Isaac",
+    "Isaiah",
+    "Isak",
+    "Isher",
+    "Ishwar",
+    "Isimeli",
+    "Isira",
+    "Ismaeel",
+    "Ismail",
+    "Israel",
+    "Issiaka",
+    "Ivan",
+    "Ivar",
+    "Izaak",
+    "J",
+    "Jaay",
+    "Jac",
+    "Jace",
+    "Jack",
+    "Jacki",
+    "Jackie",
+    "Jack-James",
+    "Jackson",
+    "Jacky",
+    "Jacob",
+    "Jacques",
+    "Jad",
+    "Jaden",
+    "Jadon",
+    "Jadyn",
+    "Jae",
+    "Jagat",
+    "Jago",
+    "Jaheim",
+    "Jahid",
+    "Jahy",
+    "Jai",
+    "Jaida",
+    "Jaiden",
+    "Jaidyn",
+    "Jaii",
+    "Jaime",
+    "Jai-Rajaram",
+    "Jaise",
+    "Jak",
+    "Jake",
+    "Jakey",
+    "Jakob",
+    "Jaksyn",
+    "Jakub",
+    "Jamaal",
+    "Jamal",
+    "Jameel",
+    "Jameil",
+    "James",
+    "James-Paul",
+    "Jamey",
+    "Jamie",
+    "Jan",
+    "Jaosha",
+    "Jardine",
+    "Jared",
+    "Jarell",
+    "Jarl",
+    "Jarno",
+    "Jarred",
+    "Jarvi",
+    "Jasey-Jay",
+    "Jasim",
+    "Jaskaran",
+    "Jason",
+    "Jasper",
+    "Jaxon",
+    "Jaxson",
+    "Jay",
+    "Jaydan",
+    "Jayden",
+    "Jayden-James",
+    "Jayden-Lee",
+    "Jayden-Paul",
+    "Jayden-Thomas",
+    "Jaydn",
+    "Jaydon",
+    "Jaydyn",
+    "Jayhan",
+    "Jay-Jay",
+    "Jayke",
+    "Jaymie",
+    "Jayse",
+    "Jayson",
+    "Jaz",
+    "Jazeb",
+    "Jazib",
+    "Jazz",
+    "Jean",
+    "Jean-Lewis",
+    "Jean-Pierre",
+    "Jebadiah",
+    "Jed",
+    "Jedd",
+    "Jedidiah",
+    "Jeemie",
+    "Jeevan",
+    "Jeffrey",
+    "Jensen",
+    "Jenson",
+    "Jensyn",
+    "Jeremy",
+    "Jerome",
+    "Jeronimo",
+    "Jerrick",
+    "Jerry",
+    "Jesse",
+    "Jesuseun",
+    "Jeswin",
+    "Jevan",
+    "Jeyun",
+    "Jez",
+    "Jia",
+    "Jian",
+    "Jiao",
+    "Jimmy",
+    "Jincheng",
+    "JJ",
+    "Joaquin",
+    "Joash",
+    "Jock",
+    "Jody",
+    "Joe",
+    "Joeddy",
+    "Joel",
+    "Joey",
+    "Joey-Jack",
+    "Johann",
+    "Johannes",
+    "Johansson",
+    "John",
+    "Johnathan",
+    "Johndean",
+    "Johnjay",
+    "John-Michael",
+    "Johnnie",
+    "Johnny",
+    "Johnpaul",
+    "John-Paul",
+    "John-Scott",
+    "Johnson",
+    "Jole",
+    "Jomuel",
+    "Jon",
+    "Jonah",
+    "Jonatan",
+    "Jonathan",
+    "Jonathon",
+    "Jonny",
+    "Jonothan",
+    "Jon-Paul",
+    "Jonson",
+    "Joojo",
+    "Jordan",
+    "Jordi",
+    "Jordon",
+    "Jordy",
+    "Jordyn",
+    "Jorge",
+    "Joris",
+    "Jorryn",
+    "Josan",
+    "Josef",
+    "Joseph",
+    "Josese",
+    "Josh",
+    "Joshiah",
+    "Joshua",
+    "Josiah",
+    "Joss",
+    "Jostelle",
+    "Joynul",
+    "Juan",
+    "Jubin",
+    "Judah",
+    "Jude",
+    "Jules",
+    "Julian",
+    "Julien",
+    "Jun",
+    "Junior",
+    "Jura",
+    "Justan",
+    "Justin",
+    "Justinas",
+    "Kaan",
+    "Kabeer",
+    "Kabir",
+    "Kacey",
+    "Kacper",
+    "Kade",
+    "Kaden",
+    "Kadin",
+    "Kadyn",
+    "Kaeden",
+    "Kael",
+    "Kaelan",
+    "Kaelin",
+    "Kaelum",
+    "Kai",
+    "Kaid",
+    "Kaidan",
+    "Kaiden",
+    "Kaidinn",
+    "Kaidyn",
+    "Kaileb",
+    "Kailin",
+    "Kain",
+    "Kaine",
+    "Kainin",
+    "Kainui",
+    "Kairn",
+    "Kaison",
+    "Kaiwen",
+    "Kajally",
+    "Kajetan",
+    "Kalani",
+    "Kale",
+    "Kaleb",
+    "Kaleem",
+    "Kal-el",
+    "Kalen",
+    "Kalin",
+    "Kallan",
+    "Kallin",
+    "Kalum",
+    "Kalvin",
+    "Kalvyn",
+    "Kameron",
+    "Kames",
+    "Kamil",
+    "Kamran",
+    "Kamron",
+    "Kane",
+    "Karam",
+    "Karamvir",
+    "Karandeep",
+    "Kareem",
+    "Karim",
+    "Karimas",
+    "Karl",
+    "Karol",
+    "Karson",
+    "Karsyn",
+    "Karthikeya",
+    "Kasey",
+    "Kash",
+    "Kashif",
+    "Kasim",
+    "Kasper",
+    "Kasra",
+    "Kavin",
+    "Kayam",
+    "Kaydan",
+    "Kayden",
+    "Kaydin",
+    "Kaydn",
+    "Kaydyn",
+    "Kaydyne",
+    "Kayleb",
+    "Kaylem",
+    "Kaylum",
+    "Kayne",
+    "Kaywan",
+    "Kealan",
+    "Kealon",
+    "Kean",
+    "Keane",
+    "Kearney",
+    "Keatin",
+    "Keaton",
+    "Keavan",
+    "Keayn",
+    "Kedrick",
+    "Keegan",
+    "Keelan",
+    "Keelin",
+    "Keeman",
+    "Keenan",
+    "Keenan-Lee",
+    "Keeton",
+    "Kehinde",
+    "Keigan",
+    "Keilan",
+    "Keir",
+    "Keiran",
+    "Keiren",
+    "Keiron",
+    "Keiryn",
+    "Keison",
+    "Keith",
+    "Keivlin",
+    "Kelam",
+    "Kelan",
+    "Kellan",
+    "Kellen",
+    "Kelso",
+    "Kelum",
+    "Kelvan",
+    "Kelvin",
+    "Ken",
+    "Kenan",
+    "Kendall",
+    "Kendyn",
+    "Kenlin",
+    "Kenneth",
+    "Kensey",
+    "Kenton",
+    "Kenyon",
+    "Kenzeigh",
+    "Kenzi",
+    "Kenzie",
+    "Kenzo",
+    "Kenzy",
+    "Keo",
+    "Ker",
+    "Kern",
+    "Kerr",
+    "Kevan",
+    "Kevin",
+    "Kevyn",
+    "Kez",
+    "Khai",
+    "Khalan",
+    "Khaleel",
+    "Khaya",
+    "Khevien",
+    "Khizar",
+    "Khizer",
+    "Kia",
+    "Kian",
+    "Kian-James",
+    "Kiaran",
+    "Kiarash",
+    "Kie",
+    "Kiefer",
+    "Kiegan",
+    "Kienan",
+    "Kier",
+    "Kieran",
+    "Kieran-Scott",
+    "Kieren",
+    "Kierin",
+    "Kiern",
+    "Kieron",
+    "Kieryn",
+    "Kile",
+    "Killian",
+    "Kimi",
+    "Kingston",
+    "Kinneil",
+    "Kinnon",
+    "Kinsey",
+    "Kiran",
+    "Kirk",
+    "Kirwin",
+    "Kit",
+    "Kiya",
+    "Kiyonari",
+    "Kjae",
+    "Klein",
+    "Klevis",
+    "Kobe",
+    "Kobi",
+    "Koby",
+    "Koddi",
+    "Koden",
+    "Kodi",
+    "Kodie",
+    "Kody",
+    "Kofi",
+    "Kogan",
+    "Kohen",
+    "Kole",
+    "Konan",
+    "Konar",
+    "Konnor",
+    "Konrad",
+    "Koray",
+    "Korben",
+    "Korbyn",
+    "Korey",
+    "Kori",
+    "Korrin",
+    "Kory",
+    "Koushik",
+    "Kris",
+    "Krish",
+    "Krishan",
+    "Kriss",
+    "Kristian",
+    "Kristin",
+    "Kristofer",
+    "Kristoffer",
+    "Kristopher",
+    "Kruz",
+    "Krzysiek",
+    "Krzysztof",
+    "Ksawery",
+    "Ksawier",
+    "Kuba",
+    "Kurt",
+    "Kurtis",
+    "Kurtis-Jae",
+    "Kyaan",
+    "Kyan",
+    "Kyde",
+    "Kyden",
+    "Kye",
+    "Kyel",
+    "Kyhran",
+    "Kyie",
+    "Kylan",
+    "Kylar",
+    "Kyle",
+    "Kyle-Derek",
+    "Kylian",
+    "Kym",
+    "Kynan",
+    "Kyral",
+    "Kyran",
+    "Kyren",
+    "Kyrillos",
+    "Kyro",
+    "Kyron",
+    "Kyrran",
+    "Lachlainn",
+    "Lachlan",
+    "Lachlann",
+    "Lael",
+    "Lagan",
+    "Laird",
+    "Laison",
+    "Lakshya",
+    "Lance",
+    "Lancelot",
+    "Landon",
+    "Lang",
+    "Lasse",
+    "Latif",
+    "Lauchlan",
+    "Lauchlin",
+    "Laughlan",
+    "Lauren",
+    "Laurence",
+    "Laurie",
+    "Lawlyn",
+    "Lawrence",
+    "Lawrie",
+    "Lawson",
+    "Layne",
+    "Layton",
+    "Lee",
+    "Leigh",
+    "Leigham",
+    "Leighton",
+    "Leilan",
+    "Leiten",
+    "Leithen",
+    "Leland",
+    "Lenin",
+    "Lennan",
+    "Lennen",
+    "Lennex",
+    "Lennon",
+    "Lennox",
+    "Lenny",
+    "Leno",
+    "Lenon",
+    "Lenyn",
+    "Leo",
+    "Leon",
+    "Leonard",
+    "Leonardas",
+    "Leonardo",
+    "Lepeng",
+    "Leroy",
+    "Leven",
+    "Levi",
+    "Levon",
+    "Levy",
+    "Lewie",
+    "Lewin",
+    "Lewis",
+    "Lex",
+    "Leydon",
+    "Leyland",
+    "Leylann",
+    "Leyton",
+    "Liall",
+    "Liam",
+    "Liam-Stephen",
+    "Limo",
+    "Lincoln",
+    "Lincoln-John",
+    "Lincon",
+    "Linden",
+    "Linton",
+    "Lionel",
+    "Lisandro",
+    "Litrell",
+    "Liyonela-Elam",
+    "LLeyton",
+    "Lliam",
+    "Lloyd",
+    "Lloyde",
+    "Loche",
+    "Lochlan",
+    "Lochlann",
+    "Lochlan-Oliver",
+    "Lock",
+    "Lockey",
+    "Logan",
+    "Logann",
+    "Logan-Rhys",
+    "Loghan",
+    "Lokesh",
+    "Loki",
+    "Lomond",
+    "Lorcan",
+    "Lorenz",
+    "Lorenzo",
+    "Lorne",
+    "Loudon",
+    "Loui",
+    "Louie",
+    "Louis",
+    "Loukas",
+    "Lovell",
+    "Luc",
+    "Luca",
+    "Lucais",
+    "Lucas",
+    "Lucca",
+    "Lucian",
+    "Luciano",
+    "Lucien",
+    "Lucus",
+    "Luic",
+    "Luis",
+    "Luk",
+    "Luka",
+    "Lukas",
+    "Lukasz",
+    "Luke",
+    "Lukmaan",
+    "Luqman",
+    "Lyall",
+    "Lyle",
+    "Lyndsay",
+    "Lysander",
+    "Maanav",
+    "Maaz",
+    "Mac",
+    "Macallum",
+    "Macaulay",
+    "Macauley",
+    "Macaully",
+    "Machlan",
+    "Maciej",
+    "Mack",
+    "Mackenzie",
+    "Mackenzy",
+    "Mackie",
+    "Macsen",
+    "Macy",
+    "Madaki",
+    "Maddison",
+    "Maddox",
+    "Madison",
+    "Madison-Jake",
+    "Madox",
+    "Mael",
+    "Magnus",
+    "Mahan",
+    "Mahdi",
+    "Mahmoud",
+    "Maias",
+    "Maison",
+    "Maisum",
+    "Maitlind",
+    "Majid",
+    "Makensie",
+    "Makenzie",
+    "Makin",
+    "Maksim",
+    "Maksymilian",
+    "Malachai",
+    "Malachi",
+    "Malachy",
+    "Malakai",
+    "Malakhy",
+    "Malcolm",
+    "Malik",
+    "Malikye",
+    "Malo",
+    "Ma'moon",
+    "Manas",
+    "Maneet",
+    "Manmohan",
+    "Manolo",
+    "Manson",
+    "Mantej",
+    "Manuel",
+    "Manus",
+    "Marc",
+    "Marc-Anthony",
+    "Marcel",
+    "Marcello",
+    "Marcin",
+    "Marco",
+    "Marcos",
+    "Marcous",
+    "Marcquis",
+    "Marcus",
+    "Mario",
+    "Marios",
+    "Marius",
+    "Mark",
+    "Marko",
+    "Markus",
+    "Marley",
+    "Marlin",
+    "Marlon",
+    "Maros",
+    "Marshall",
+    "Martin",
+    "Marty",
+    "Martyn",
+    "Marvellous",
+    "Marvin",
+    "Marwan",
+    "Maryk",
+    "Marzuq",
+    "Mashhood",
+    "Mason",
+    "Mason-Jay",
+    "Masood",
+    "Masson",
+    "Matas",
+    "Matej",
+    "Mateusz",
+    "Mathew",
+    "Mathias",
+    "Mathu",
+    "Mathuyan",
+    "Mati",
+    "Matt",
+    "Matteo",
+    "Matthew",
+    "Matthew-William",
+    "Matthias",
+    "Max",
+    "Maxim",
+    "Maximilian",
+    "Maximillian",
+    "Maximus",
+    "Maxwell",
+    "Maxx",
+    "Mayeul",
+    "Mayson",
+    "Mazin",
+    "Mcbride",
+    "McCaulley",
+    "McKade",
+    "McKauley",
+    "McKay",
+    "McKenzie",
+    "McLay",
+    "Meftah",
+    "Mehmet",
+    "Mehraz",
+    "Meko",
+    "Melville",
+    "Meshach",
+    "Meyzhward",
+    "Micah",
+    "Michael",
+    "Michael-Alexander",
+    "Michael-James",
+    "Michal",
+    "Michat",
+    "Micheal",
+    "Michee",
+    "Mickey",
+    "Miguel",
+    "Mika",
+    "Mikael",
+    "Mikee",
+    "Mikey",
+    "Mikhail",
+    "Mikolaj",
+    "Miles",
+    "Millar",
+    "Miller",
+    "Milo",
+    "Milos",
+    "Milosz",
+    "Mir",
+    "Mirza",
+    "Mitch",
+    "Mitchel",
+    "Mitchell",
+    "Moad",
+    "Moayd",
+    "Mobeen",
+    "Modoulamin",
+    "Modu",
+    "Mohamad",
+    "Mohamed",
+    "Mohammad",
+    "Mohammad-Bilal",
+    "Mohammed",
+    "Mohanad",
+    "Mohd",
+    "Momin",
+    "Momooreoluwa",
+    "Montague",
+    "Montgomery",
+    "Monty",
+    "Moore",
+    "Moosa",
+    "Moray",
+    "Morgan",
+    "Morgyn",
+    "Morris",
+    "Morton",
+    "Moshy",
+    "Motade",
+    "Moyes",
+    "Msughter",
+    "Mueez",
+    "Muhamadjavad",
+    "Muhammad",
+    "Muhammed",
+    "Muhsin",
+    "Muir",
+    "Munachi",
+    "Muneeb",
+    "Mungo",
+    "Munir",
+    "Munmair",
+    "Munro",
+    "Murdo",
+    "Murray",
+    "Murrough",
+    "Murry",
+    "Musa",
+    "Musse",
+    "Mustafa",
+    "Mustapha",
+    "Muzammil",
+    "Muzzammil",
+    "Mykie",
+    "Myles",
+    "Mylo",
+    "Nabeel",
+    "Nadeem",
+    "Nader",
+    "Nagib",
+    "Naif",
+    "Nairn",
+    "Narvic",
+    "Nash",
+    "Nasser",
+    "Nassir",
+    "Natan",
+    "Nate",
+    "Nathan",
+    "Nathanael",
+    "Nathanial",
+    "Nathaniel",
+    "Nathan-Rae",
+    "Nawfal",
+    "Nayan",
+    "Neco",
+    "Neil",
+    "Nelson",
+    "Neo",
+    "Neshawn",
+    "Nevan",
+    "Nevin",
+    "Ngonidzashe",
+    "Nial",
+    "Niall",
+    "Nicholas",
+    "Nick",
+    "Nickhill",
+    "Nicki",
+    "Nickson",
+    "Nicky",
+    "Nico",
+    "Nicodemus",
+    "Nicol",
+    "Nicolae",
+    "Nicolas",
+    "Nidhish",
+    "Nihaal",
+    "Nihal",
+    "Nikash",
+    "Nikhil",
+    "Niki",
+    "Nikita",
+    "Nikodem",
+    "Nikolai",
+    "Nikos",
+    "Nilav",
+    "Niraj",
+    "Niro",
+    "Niven",
+    "Noah",
+    "Noel",
+    "Nolan",
+    "Noor",
+    "Norman",
+    "Norrie",
+    "Nuada",
+    "Nyah",
+    "Oakley",
+    "Oban",
+    "Obieluem",
+    "Obosa",
+    "Odhran",
+    "Odin",
+    "Odynn",
+    "Ogheneochuko",
+    "Ogheneruno",
+    "Ohran",
+    "Oilibhear",
+    "Oisin",
+    "Ojima-Ojo",
+    "Okeoghene",
+    "Olaf",
+    "Ola-Oluwa",
+    "Olaoluwapolorimi",
+    "Ole",
+    "Olie",
+    "Oliver",
+    "Olivier",
+    "Oliwier",
+    "Ollie",
+    "Olurotimi",
+    "Oluwadamilare",
+    "Oluwadamiloju",
+    "Oluwafemi",
+    "Oluwafikunayomi",
+    "Oluwalayomi",
+    "Oluwatobiloba",
+    "Oluwatoni",
+    "Omar",
+    "Omri",
+    "Oran",
+    "Orin",
+    "Orlando",
+    "Orley",
+    "Orran",
+    "Orrick",
+    "Orrin",
+    "Orson",
+    "Oryn",
+    "Oscar",
+    "Osesenagha",
+    "Oskar",
+    "Ossian",
+    "Oswald",
+    "Otto",
+    "Owain",
+    "Owais",
+    "Owen",
+    "Owyn",
+    "Oz",
+    "Ozzy",
+    "Pablo",
+    "Pacey",
+    "Padraig",
+    "Paolo",
+    "Pardeepraj",
+    "Parkash",
+    "Parker",
+    "Pascoe",
+    "Pasquale",
+    "Patrick",
+    "Patrick-John",
+    "Patrikas",
+    "Patryk",
+    "Paul",
+    "Pavit",
+    "Pawel",
+    "Pawlo",
+    "Pearce",
+    "Pearse",
+    "Pearsen",
+    "Pedram",
+    "Pedro",
+    "Peirce",
+    "Peiyan",
+    "Pele",
+    "Peni",
+    "Peregrine",
+    "Peter",
+    "Phani",
+    "Philip",
+    "Philippos",
+    "Phinehas",
+    "Phoenix",
+    "Phoevos",
+    "Pierce",
+    "Pierre-Antoine",
+    "Pieter",
+    "Pietro",
+    "Piotr",
+    "Porter",
+    "Prabhjoit",
+    "Prabodhan",
+    "Praise",
+    "Pranav",
+    "Pravin",
+    "Precious",
+    "Prentice",
+    "Presley",
+    "Preston",
+    "Preston-Jay",
+    "Prinay",
+    "Prince",
+    "Prithvi",
+    "Promise",
+    "Puneetpaul",
+    "Pushkar",
+    "Qasim",
+    "Qirui",
+    "Quinlan",
+    "Quinn",
+    "Radmiras",
+    "Raees",
+    "Raegan",
+    "Rafael",
+    "Rafal",
+    "Rafferty",
+    "Rafi",
+    "Raheem",
+    "Rahil",
+    "Rahim",
+    "Rahman",
+    "Raith",
+    "Raithin",
+    "Raja",
+    "Rajab-Ali",
+    "Rajan",
+    "Ralfs",
+    "Ralph",
+    "Ramanas",
+    "Ramit",
+    "Ramone",
+    "Ramsay",
+    "Ramsey",
+    "Rana",
+    "Ranolph",
+    "Raphael",
+    "Rasmus",
+    "Rasul",
+    "Raul",
+    "Raunaq",
+    "Ravin",
+    "Ray",
+    "Rayaan",
+    "Rayan",
+    "Rayane",
+    "Rayden",
+    "Rayhan",
+    "Raymond",
+    "Rayne",
+    "Rayyan",
+    "Raza",
+    "Reace",
+    "Reagan",
+    "Reean",
+    "Reece",
+    "Reed",
+    "Reegan",
+    "Rees",
+    "Reese",
+    "Reeve",
+    "Regan",
+    "Regean",
+    "Reggie",
+    "Rehaan",
+    "Rehan",
+    "Reice",
+    "Reid",
+    "Reigan",
+    "Reilly",
+    "Reily",
+    "Reis",
+    "Reiss",
+    "Remigiusz",
+    "Remo",
+    "Remy",
+    "Ren",
+    "Renars",
+    "Reng",
+    "Rennie",
+    "Reno",
+    "Reo",
+    "Reuben",
+    "Rexford",
+    "Reynold",
+    "Rhein",
+    "Rheo",
+    "Rhett",
+    "Rheyden",
+    "Rhian",
+    "Rhoan",
+    "Rholmark",
+    "Rhoridh",
+    "Rhuairidh",
+    "Rhuan",
+    "Rhuaridh",
+    "Rhudi",
+    "Rhy",
+    "Rhyan",
+    "Rhyley",
+    "Rhyon",
+    "Rhys",
+    "Rhys-Bernard",
+    "Rhyse",
+    "Riach",
+    "Rian",
+    "Ricards",
+    "Riccardo",
+    "Ricco",
+    "Rice",
+    "Richard",
+    "Richey",
+    "Richie",
+    "Ricky",
+    "Rico",
+    "Ridley",
+    "Ridwan",
+    "Rihab",
+    "Rihan",
+    "Rihards",
+    "Rihonn",
+    "Rikki",
+    "Riley",
+    "Rio",
+    "Rioden",
+    "Rishi",
+    "Ritchie",
+    "Rivan",
+    "Riyadh",
+    "Riyaj",
+    "Roan",
+    "Roark",
+    "Roary",
+    "Rob",
+    "Robbi",
+    "Robbie",
+    "Robbie-lee",
+    "Robby",
+    "Robert",
+    "Robert-Gordon",
+    "Robertjohn",
+    "Robi",
+    "Robin",
+    "Rocco",
+    "Roddy",
+    "Roderick",
+    "Rodrigo",
+    "Roen",
+    "Rogan",
+    "Roger",
+    "Rohaan",
+    "Rohan",
+    "Rohin",
+    "Rohit",
+    "Rokas",
+    "Roman",
+    "Ronald",
+    "Ronan",
+    "Ronan-Benedict",
+    "Ronin",
+    "Ronnie",
+    "Rooke",
+    "Roray",
+    "Rori",
+    "Rorie",
+    "Rory",
+    "Roshan",
+    "Ross",
+    "Ross-Andrew",
+    "Rossi",
+    "Rowan",
+    "Rowen",
+    "Roy",
+    "Ruadhan",
+    "Ruaidhri",
+    "Ruairi",
+    "Ruairidh",
+    "Ruan",
+    "Ruaraidh",
+    "Ruari",
+    "Ruaridh",
+    "Ruben",
+    "Rubhan",
+    "Rubin",
+    "Rubyn",
+    "Rudi",
+    "Rudy",
+    "Rufus",
+    "Rui",
+    "Ruo",
+    "Rupert",
+    "Ruslan",
+    "Russel",
+    "Russell",
+    "Ryaan",
+    "Ryan",
+    "Ryan-Lee",
+    "Ryden",
+    "Ryder",
+    "Ryese",
+    "Ryhs",
+    "Rylan",
+    "Rylay",
+    "Rylee",
+    "Ryleigh",
+    "Ryley",
+    "Rylie",
+    "Ryo",
+    "Ryszard",
+    "Saad",
+    "Sabeen",
+    "Sachkirat",
+    "Saffi",
+    "Saghun",
+    "Sahaib",
+    "Sahbian",
+    "Sahil",
+    "Saif",
+    "Saifaddine",
+    "Saim",
+    "Sajid",
+    "Sajjad",
+    "Salahudin",
+    "Salman",
+    "Salter",
+    "Salvador",
+    "Sam",
+    "Saman",
+    "Samar",
+    "Samarjit",
+    "Samatar",
+    "Sambrid",
+    "Sameer",
+    "Sami",
+    "Samir",
+    "Sami-Ullah",
+    "Samual",
+    "Samuel",
+    "Samuela",
+    "Samy",
+    "Sanaullah",
+    "Sandro",
+    "Sandy",
+    "Sanfur",
+    "Sanjay",
+    "Santiago",
+    "Santino",
+    "Satveer",
+    "Saul",
+    "Saunders",
+    "Savin",
+    "Sayad",
+    "Sayeed",
+    "Sayf",
+    "Scot",
+    "Scott",
+    "Scott-Alexander",
+    "Seaan",
+    "Seamas",
+    "Seamus",
+    "Sean",
+    "Seane",
+    "Sean-James",
+    "Sean-Paul",
+    "Sean-Ray",
+    "Seb",
+    "Sebastian",
+    "Sebastien",
+    "Selasi",
+    "Seonaidh",
+    "Sephiroth",
+    "Sergei",
+    "Sergio",
+    "Seth",
+    "Sethu",
+    "Seumas",
+    "Shaarvin",
+    "Shadow",
+    "Shae",
+    "Shahmir",
+    "Shai",
+    "Shane",
+    "Shannon",
+    "Sharland",
+    "Sharoz",
+    "Shaughn",
+    "Shaun",
+    "Shaunpaul",
+    "Shaun-Paul",
+    "Shaun-Thomas",
+    "Shaurya",
+    "Shaw",
+    "Shawn",
+    "Shawnpaul",
+    "Shay",
+    "Shayaan",
+    "Shayan",
+    "Shaye",
+    "Shayne",
+    "Shazil",
+    "Shea",
+    "Sheafan",
+    "Sheigh",
+    "Shenuk",
+    "Sher",
+    "Shergo",
+    "Sheriff",
+    "Sherwyn",
+    "Shiloh",
+    "Shiraz",
+    "Shreeram",
+    "Shreyas",
+    "Shyam",
+    "Siddhant",
+    "Siddharth",
+    "Sidharth",
+    "Sidney",
+    "Siergiej",
+    "Silas",
+    "Simon",
+    "Sinai",
+    "Skye",
+    "Sofian",
+    "Sohaib",
+    "Sohail",
+    "Soham",
+    "Sohan",
+    "Sol",
+    "Solomon",
+    "Sonneey",
+    "Sonni",
+    "Sonny",
+    "Sorley",
+    "Soul",
+    "Spencer",
+    "Spondon",
+    "Stanislaw",
+    "Stanley",
+    "Stefan",
+    "Stefano",
+    "Stefin",
+    "Stephen",
+    "Stephenjunior",
+    "Steve",
+    "Steven",
+    "Steven-lee",
+    "Stevie",
+    "Stewart",
+    "Stewarty",
+    "Strachan",
+    "Struan",
+    "Stuart",
+    "Su",
+    "Subhaan",
+    "Sudais",
+    "Suheyb",
+    "Suilven",
+    "Sukhi",
+    "Sukhpal",
+    "Sukhvir",
+    "Sulayman",
+    "Sullivan",
+    "Sultan",
+    "Sung",
+    "Sunny",
+    "Suraj",
+    "Surien",
+    "Sweyn",
+    "Syed",
+    "Sylvain",
+    "Symon",
+    "Szymon",
+    "Tadd",
+    "Taddy",
+    "Tadhg",
+    "Taegan",
+    "Taegen",
+    "Tai",
+    "Tait",
+    "Taiwo",
+    "Talha",
+    "Taliesin",
+    "Talon",
+    "Talorcan",
+    "Tamar",
+    "Tamiem",
+    "Tammam",
+    "Tanay",
+    "Tane",
+    "Tanner",
+    "Tanvir",
+    "Tanzeel",
+    "Taonga",
+    "Tarik",
+    "Tariq-Jay",
+    "Tate",
+    "Taylan",
+    "Taylar",
+    "Tayler",
+    "Taylor",
+    "Taylor-Jay",
+    "Taylor-Lee",
+    "Tayo",
+    "Tayyab",
+    "Tayye",
+    "Tayyib",
+    "Teagan",
+    "Tee",
+    "Teejay",
+    "Tee-jay",
+    "Tegan",
+    "Teighen",
+    "Teiyib",
+    "Te-Jay",
+    "Temba",
+    "Teo",
+    "Teodor",
+    "Teos",
+    "Terry",
+    "Teydren",
+    "Theo",
+    "Theodore",
+    "Thiago",
+    "Thierry",
+    "Thom",
+    "Thomas",
+    "Thomas-Jay",
+    "Thomson",
+    "Thorben",
+    "Thorfinn",
+    "Thrinei",
+    "Thumbiko",
+    "Tiago",
+    "Tian",
+    "Tiarnan",
+    "Tibet",
+    "Tieran",
+    "Tiernan",
+    "Timothy",
+    "Timucin",
+    "Tiree",
+    "Tisloh",
+    "Titi",
+    "Titus",
+    "Tiylar",
+    "TJ",
+    "Tjay",
+    "T-Jay",
+    "Tobey",
+    "Tobi",
+    "Tobias",
+    "Tobie",
+    "Toby",
+    "Todd",
+    "Tokinaga",
+    "Toluwalase",
+    "Tom",
+    "Tomas",
+    "Tomasz",
+    "Tommi-Lee",
+    "Tommy",
+    "Tomson",
+    "Tony",
+    "Torin",
+    "Torquil",
+    "Torran",
+    "Torrin",
+    "Torsten",
+    "Trafford",
+    "Trai",
+    "Travis",
+    "Tre",
+    "Trent",
+    "Trey",
+    "Tristain",
+    "Tristan",
+    "Troy",
+    "Tubagus",
+    "Turki",
+    "Turner",
+    "Ty",
+    "Ty-Alexander",
+    "Tye",
+    "Tyelor",
+    "Tylar",
+    "Tyler",
+    "Tyler-James",
+    "Tyler-Jay",
+    "Tyllor",
+    "Tylor",
+    "Tymom",
+    "Tymon",
+    "Tymoteusz",
+    "Tyra",
+    "Tyree",
+    "Tyrnan",
+    "Tyrone",
+    "Tyson",
+    "Ubaid",
+    "Ubayd",
+    "Uchenna",
+    "Uilleam",
+    "Umair",
+    "Umar",
+    "Umer",
+    "Umut",
+    "Urban",
+    "Uri",
+    "Usman",
+    "Uzair",
+    "Uzayr",
+    "Valen",
+    "Valentin",
+    "Valentino",
+    "Valery",
+    "Valo",
+    "Vasyl",
+    "Vedantsinh",
+    "Veeran",
+    "Victor",
+    "Victory",
+    "Vinay",
+    "Vince",
+    "Vincent",
+    "Vincenzo",
+    "Vinh",
+    "Vinnie",
+    "Vithujan",
+    "Vladimir",
+    "Vladislav",
+    "Vrishin",
+    "Vuyolwethu",
+    "Wabuya",
+    "Wai",
+    "Walid",
+    "Wallace",
+    "Walter",
+    "Waqaas",
+    "Warkhas",
+    "Warren",
+    "Warrick",
+    "Wasif",
+    "Wayde",
+    "Wayne",
+    "Wei",
+    "Wen",
+    "Wesley",
+    "Wesley-Scott",
+    "Wiktor",
+    "Wilkie",
+    "Will",
+    "William",
+    "William-John",
+    "Willum",
+    "Wilson",
+    "Windsor",
+    "Wojciech",
+    "Woyenbrakemi",
+    "Wyatt",
+    "Wylie",
+    "Wynn",
+    "Xabier",
+    "Xander",
+    "Xavier",
+    "Xiao",
+    "Xida",
+    "Xin",
+    "Xue",
+    "Yadgor",
+    "Yago",
+    "Yahya",
+    "Yakup",
+    "Yang",
+    "Yanick",
+    "Yann",
+    "Yannick",
+    "Yaseen",
+    "Yasin",
+    "Yasir",
+    "Yassin",
+    "Yoji",
+    "Yong",
+    "Yoolgeun",
+    "Yorgos",
+    "Youcef",
+    "Yousif",
+    "Youssef",
+    "Yu",
+    "Yuanyu",
+    "Yuri",
+    "Yusef",
+    "Yusuf",
+    "Yves",
+    "Zaaine",
+    "Zaak",
+    "Zac",
+    "Zach",
+    "Zachariah",
+    "Zacharias",
+    "Zacharie",
+    "Zacharius",
+    "Zachariya",
+    "Zachary",
+    "Zachary-Marc",
+    "Zachery",
+    "Zack",
+    "Zackary",
+    "Zaid",
+    "Zain",
+    "Zaine",
+    "Zaineddine",
+    "Zainedin",
+    "Zak",
+    "Zakaria",
+    "Zakariya",
+    "Zakary",
+    "Zaki",
+    "Zakir",
+    "Zakk",
+    "Zamaar",
+    "Zander",
+    "Zane",
+    "Zarran",
+    "Zayd",
+    "Zayn",
+    "Zayne",
+    "Ze",
+    "Zechariah",
+    "Zeek",
+    "Zeeshan",
+    "Zeid",
+    "Zein",
+    "Zen",
+    "Zendel",
+    "Zenith",
+    "Zennon",
+    "Zeph",
+    "Zerah",
+    "Zhen",
+    "Zhi",
+    "Zhong",
+    "Zhuo",
+    "Zi",
+    "Zidane",
+    "Zijie",
+    "Zinedine",
+    "Zion",
+    "Zishan",
+    "Ziya",
+    "Ziyaan",
+    "Zohaib",
+    "Zohair",
+    "Zoubaeir",
+    "Zubair",
+    "Zubayr",
+    "Zuriel",
+]
+
+VERBS = [
+    "loves",
+    "adores",
+    "cherishes",
+    "fancies",
+    "prefers",
+    "appreciates",
+    "enjoys",
+    "relishes",
+    "savors",
+    "likes",
+    "dotes",
+    "treasures",
+    "admires",
+    "reveres",
+    "idolizes",
+    "worships",
+    "exalts",
+    "esteems",
+    "covets",
+    "craves",
+    "desires",
+    "yearns",
+    "pines",
+    "dislikes",
+    "hates",
+    "despises",
+    "detests",
+    "abhors",
+    "loathes",
+    "eschews",
+    "rejects",
+    "scorns",
+    "spurns",
+    "disdains",
+    "shuns",
+    "approves of",
+    "disapproves of",
+    "extols",
+    "prizes",
+    "respects",
+    "values",
+    "embraces",
+    "endorses",
+    "welcomes",
+    "celebrates",
+    "applauds",
+    "commends",
+    "supports",
+    "favors",
+    "champions",
+    "basks in",
+    "delights in",
+    "thrives on",
+    "adores",
+    "adores",
+    "bears",
+    "tolerates",
+    "accepts",
+    "stomachs",
+    "abides",
+    "endures",
+    "puts up with",
+    "can’t stand",
+    "can’t bear",
+    "is fond of",
+    "is keen on",
+    "is partial to",
+    "is crazy about",
+    "is nuts about",
+    "is passionate about",
+    "is obsessed with",
+    "is addicted to",
+    "longs for",
+    "pines for",
+    "yearns for",
+    "lusts after",
+    "rejoices in",
+    "revels in",
+    "exults in",
+    "glories in",
+    "finds pleasure in",
+    "finds joy in",
+    "finds satisfaction in",
+    "finds fulfillment in",
+    "is devoted to",
+    "is committed to",
+    "adores",
+    "worships",
+    "idolizes",
+    "deifies",
+    "glorifies",
+    "exults",
+    "disdains",
+    "scorns",
+    "derides",
+    "ridicules",
+    "mocks",
+    "sneers at",
+    "scoffs at",
+    "spits on",
+    "reviles",
+    "execrates",
+    "curses",
+    "blasts",
+    "damns",
+    "rails against",
+    "resents",
+    "begrudges",
+    "bemoans",
+    "laments",
+    "regrets",
+    "gripes about",
+    "complains about",
+    "is indifferent to",
+    "is apathetic about",
+    "is neutral toward",
+    "shrugs off",
+    "ignores",
+    "overlooks",
+    "neglects",
+    "dismisses",
+    "brushes off",
+    "waves away",
+]
+
+SUBJECTS = [
+    # Group 1: Colors (50)
+    "the color red",
+    "the color blue",
+    "the color green",
+    "the color yellow",
+    "the color purple",
+    "the color pink",
+    "the color orange",
+    "the color brown",
+    "the color black",
+    "the color white",
+    "the color gray",
+    "the color violet",
+    "the color indigo",
+    "the color turquoise",
+    "the color teal",
+    "the color magenta",
+    "the color maroon",
+    "the color gold",
+    "the color silver",
+    "the color bronze",
+    "the color amber",
+    "the color burgundy",
+    "the color chartreuse",
+    "the color crimson",
+    "the color cyan",
+    "the color fuchsia",
+    "the color lavender",
+    "the color lime",
+    "the color mint",
+    "the color navy",
+    "the color olive",
+    "the color peach",
+    "the color plum",
+    "the color salmon",
+    "the color sienna",
+    "the color tan",
+    "the color taupe",
+    "the color periwinkle",
+    "the color aquamarine",
+    "the color beige",
+    "the color coral",
+    "the color eggplant",
+    "the color ivory",
+    "the color khaki",
+    "the color lemon",
+    "the color lilac",
+    "the color mocha",
+    "the color mustard",
+    "the color ruby",
+    "the color sapphire",
+    # Group 2: Animals (50)
+    "cats",
+    "dogs",
+    "elephants",
+    "lions",
+    "tigers",
+    "bears",
+    "wolves",
+    "foxes",
+    "horses",
+    "cows",
+    "pigs",
+    "sheep",
+    "goats",
+    "chickens",
+    "ducks",
+    "geese",
+    "deer",
+    "monkeys",
+    "giraffes",
+    "zebras",
+    "kangaroos",
+    "pandas",
+    "rabbits",
+    "squirrels",
+    "bats",
+    "whales",
+    "dolphins",
+    "sharks",
+    "octopuses",
+    "crabs",
+    "lobsters",
+    "snakes",
+    "alligators",
+    "crocodiles",
+    "parrots",
+    "eagles",
+    "falcons",
+    "peacocks",
+    "ostriches",
+    "camels",
+    "buffaloes",
+    "rhinos",
+    "hippopotamuses",
+    "ants",
+    "bees",
+    "butterflies",
+    "spiders",
+    "scorpions",
+    "turtles",
+    "penguins",
+    # Group 3: Sports (50)
+    "playing soccer",
+    "playing basketball",
+    "playing baseball",
+    "playing football",
+    "playing tennis",
+    "playing volleyball",
+    "playing golf",
+    "playing hockey",
+    "playing cricket",
+    "playing rugby",
+    "playing table tennis",
+    "playing badminton",
+    "playing squash",
+    "playing lacrosse",
+    "skiing",
+    "snowboarding",
+    "surfing",
+    "skateboarding",
+    "cycling",
+    "running",
+    "swimming",
+    "boxing",
+    "wrestling",
+    "fencing",
+    "martial arts",
+    "archery",
+    "climbing",
+    "hiking",
+    "kayaking",
+    "canoeing",
+    "sailing",
+    "rowing",
+    "diving",
+    "scuba diving",
+    "ice skating",
+    "roller skating",
+    "mountain biking",
+    "trail running",
+    "triathlon",
+    "marathon running",
+    "parkour",
+    "ultimate frisbee",
+    "playing handball",
+    "playing darts",
+    "playing billiards",
+    "playing bowling",
+    "playing ping pong",
+    "playing ice hockey",
+    "playing water polo",
+    "playing field hockey",
+    # Group 4: Household Activities/Objects (50)
+    "washing the dishes",
+    "cleaning the windows",
+    "vacuuming the floor",
+    "mopping the floor",
+    "ironing clothes",
+    "making the bed",
+    "doing the laundry",
+    "dusting the furniture",
+    "taking out the trash",
+    "cooking dinner",
+    "baking bread",
+    "washing the car",
+    "gardening",
+    "raking the leaves",
+    "watering the plants",
+    "pruning the bushes",
+    "sweeping the porch",
+    "polishing the silver",
+    "organizing the closet",
+    "cleaning the bathroom",
+    "scrubbing the tub",
+    "cleaning the refrigerator",
+    "emptying the dishwasher",
+    "making coffee",
+    "setting the table",
+    "preparing breakfast",
+    "rearranging the furniture",
+    "ironing the curtains",
+    "folding the laundry",
+    "mending clothes",
+    "recycling",
+    "composting",
+    "decluttering",
+    "cleaning the gutters",
+    "cleaning the patio",
+    "cleaning the oven",
+    "organizing the pantry",
+    "cleaning the carpets",
+    "dusting the shelves",
+    "cleaning the ceiling fan",
+    "cleaning the microwave",
+    "scrubbing the floor",
+    "polishing the wood",
+    "cleaning the blinds",
+    "watering the garden",
+    "weeding the garden",
+    "cleaning the garage",
+    "sweeping the driveway",
+    "rinsing the vegetables",
+    "chopping vegetables",
+    # Group 5: Concepts (50)
+    "philosophy",
+    "mathematics",
+    "history",
+    "science",
+    "literature",
+    "poetry",
+    "art",
+    "music",
+    "cinema",
+    "theatre",
+    "technology",
+    "astronomy",
+    "psychology",
+    "sociology",
+    "economics",
+    "politics",
+    "ecology",
+    "anthropology",
+    "ethics",
+    "religion",
+    "logic",
+    "metaphysics",
+    "biology",
+    "geography",
+    "linguistics",
+    "archaeology",
+    "literacy",
+    "creativity",
+    "imagination",
+    "innovation",
+    "curiosity",
+    "adventure",
+    "mystery",
+    "beauty",
+    "courage",
+    "compassion",
+    "empathy",
+    "integrity",
+    "kindness",
+    "patience",
+    "wisdom",
+    "resilience",
+    "determination",
+    "gratitude",
+    "humor",
+    "optimism",
+    "simplicity",
+    "elegance",
+    "serenity",
+    "balance",
+    # Group 6: Music/Arts (50)
+    "playing guitar",
+    "playing piano",
+    "playing drums",
+    "playing violin",
+    "playing flute",
+    "playing saxophone",
+    "singing",
+    "dancing",
+    "painting",
+    "drawing",
+    "sculpting",
+    "playing the trumpet",
+    "playing the clarinet",
+    "playing the cello",
+    "playing the harp",
+    "playing the oboe",
+    "playing the accordion",
+    "playing the banjo",
+    "playing the mandolin",
+    "playing the ukulele",
+    "composing music",
+    "writing poetry",
+    "writing stories",
+    "writing novels",
+    "performing stand-up comedy",
+    "photography",
+    "filmmaking",
+    "acting",
+    "ballet dancing",
+    "hip-hop dancing",
+    "modern dancing",
+    "tap dancing",
+    "jazz dancing",
+    "playing percussion",
+    "singing opera",
+    "DJing",
+    "rapping",
+    "beatboxing",
+    "mixing music",
+    "recording music",
+    "listening to classical music",
+    "listening to rock music",
+    "listening to jazz",
+    "listening to blues",
+    "listening to electronic music",
+    "listening to folk music",
+    "performing magic",
+    "calligraphy",
+    "sketching",
+    "playing board games",
+    # Group 7: Vehicles/Travel (50)
+    "cars",
+    "bicycles",
+    "motorcycles",
+    "trains",
+    "airplanes",
+    "boats",
+    "scooters",
+    "submarines",
+    "helicopters",
+    "trucks",
+    "vans",
+    "sailboats",
+    "kayaks",
+    "canoes",
+    "jet skis",
+    "electric scooters",
+    "skateboards",
+    "rollerblades",
+    "segways",
+    "hot air balloons",
+    "space shuttles",
+    "ferries",
+    "limousines",
+    "convertibles",
+    "minivans",
+    "motorhomes",
+    "tractors",
+    "bulldozers",
+    "forklifts",
+    "subways",
+    "trams",
+    "pedicabs",
+    "rickshaws",
+    "sailplanes",
+    "gliders",
+    "balloons",
+    "camping trailers",
+    "snowmobiles",
+    "all-terrain vehicles",
+    "roadsters",
+    "coupe cars",
+    "convertible cars",
+    "electric cars",
+    "hybrid cars",
+    "sports cars",
+    "off-road vehicles",
+    "luxury sedans",
+    "minibikes",
+    "electric bikes",
+    "racing cars",
+    # Group 8: Food and Beverages (50)
+    "pizza",
+    "sushi",
+    "pasta",
+    "burger",
+    "salad",
+    "steak",
+    "chocolate",
+    "ice cream",
+    "coffee",
+    "tea",
+    "wine",
+    "beer",
+    "cocktails",
+    "bread",
+    "cheese",
+    "fruit salad",
+    "vegetable soup",
+    "sandwiches",
+    "tacos",
+    "burritos",
+    "dumplings",
+    "noodles",
+    "curry",
+    "barbecue",
+    "hot dogs",
+    "soup",
+    "pastries",
+    "cupcakes",
+    "cookies",
+    "brownies",
+    "muffins",
+    "pancakes",
+    "waffles",
+    "omelettes",
+    "smoothies",
+    "salmon",
+    "grilled chicken",
+    "roast beef",
+    "french fries",
+    "popcorn",
+    "nachos",
+    "cheesecake",
+    "pie",
+    "candy",
+    "sandwich",
+    "stir-fry",
+    "lasagna",
+    "quiche",
+    "salsa",
+    "guacamole",
+    # Group 9: Hobbies/Leisure (50)
+    "reading mystery novels",
+    "reading science fiction",
+    "reading fantasy novels",
+    "solving puzzles",
+    "playing chess",
+    "playing checkers",
+    "playing video games",
+    "building model airplanes",
+    "building model cars",
+    "collecting stamps",
+    "collecting coins",
+    "collecting postcards",
+    "collecting antiques",
+    "writing journals",
+    "blogging",
+    "vlogging",
+    "scrapbooking",
+    "knitting",
+    "crocheting",
+    "sewing",
+    "embroidery",
+    "painting miniatures",
+    "solving crossword puzzles",
+    "playing sudoku",
+    "bird watching",
+    "stargazing",
+    "astronomy hobby",
+    "fishing",
+    "camping",
+    "woodworking",
+    "metalworking",
+    "pottery making",
+    "baking cakes",
+    "brewing beer",
+    "winemaking",
+    "cheese making",
+    "travel blogging",
+    "geocaching",
+    "roller coaster riding",
+    "visiting museums",
+    "visiting art galleries",
+    "visiting historical sites",
+    "learning languages",
+    "dancing salsa",
+    "dancing tango",
+    "learning magic tricks",
+    "solving riddles",
+    "visiting theme parks",
+    "exploring caves",
+    "attending concerts",
+    # Group 10: Miscellaneous (50)
+    "modern architecture",
+    "robotics",
+    "quantum physics",
+    "meditation",
+    "yoga",
+    "traveling",
+    "cooking experiments",
+    "urban exploration",
+    "digital art",
+    "virtual reality",
+    "augmented reality",
+    "3D printing",
+    "astronautics",
+    "cybersecurity",
+    "artificial intelligence",
+    "machine learning",
+    "data science",
+    "environmental conservation",
+    "sustainable living",
+    "minimalism",
+    "vintage cars",
+    "classic literature",
+    "indie films",
+    "experimental theater",
+    "improv theatre",
+    "modern dance",
+    "street art",
+    "graffiti art",
+    "podcasting",
+    "social media trends",
+    "entrepreneurship",
+    "investment strategies",
+    "stock market trends",
+    "cryptocurrency",
+    "travel photography",
+    "wildlife conservation",
+    "botany",
+    "zoology",
+    "geology",
+    "mythology",
+    "folklore",
+    "origami",
+    "historical documentaries",
+    "space exploration",
+    "bird photography",
+    "landscape photography",
+    "comic books",
+    "anime",
+    "manga",
+    "virtual concerts",
+]
diff --git a/reasoning_gym/cognition/needle_haystack.py b/reasoning_gym/cognition/needle_haystack.py
new file mode 100644
index 00000000..a8cd1cf4
--- /dev/null
+++ b/reasoning_gym/cognition/needle_haystack.py
@@ -0,0 +1,130 @@
+import re
+from dataclasses import dataclass
+from random import Random
+from typing import Any, Dict, List, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class NeedleHaystackConfig:
+    """Configuration for NeedleHaystack task generation"""
+
+    num_statements: int = 50
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self) -> None:
+        """Validate configuration parameters"""
+        assert self.num_statements > 0, "num_statements must be greater than 0"
+        assert self.num_statements < 168387000, f"num_statements must be less than {168387000}"
+
+
+def generate_unique_triplets(names: List[str], verbs: List[str], subjects: List[str], n: int, rng) -> Dict[str, Any]:
+    """
+    Generate n unique random triplets (name, verb, subject) without generating the full Cartesian product in memory.
+
+    Each triplet is selected based on a unique index derived from a range of
+    total possible combinations. Additionally, one of the generated triplets is
+    randomly chosen as the 'needle'.
+
+    Args:
+        names (List[str]): List of names.
+        verbs (List[str]): List of verbs.
+        subjects (List[str]): List of subjects.
+        n (int): Number of unique triplets to generate.
+        rng (random.Random): A pre-seeded random number generator.
+
+    Returns:
+        Dict[str, Any]: A dictionary with:
+            - "triplets": a list of n unique triplets (tuples of (name, verb, subject)),
+            - "needle": one triplet randomly chosen from the list.
+
+    Raises:
+        ValueError: If n exceeds the total number of unique triplets possible.
+    """
+    total_possible = len(names) * len(verbs) * len(subjects)
+
+    # Use a range for memory efficiency and sample n unique indices.
+    indices = rng.sample(range(total_possible), n)
+    triplets: List[Tuple[str, str, str]] = []
+
+    num_verbs = len(verbs)
+    num_subjects = len(subjects)
+
+    for idx in indices:
+        # Compute the corresponding indices for names, verbs, and subjects.
+        name_index = idx // (num_verbs * num_subjects)
+        remainder = idx % (num_verbs * num_subjects)
+        verb_index = remainder // num_subjects
+        subject_index = remainder % num_subjects
+
+        triplet = (names[name_index], verbs[verb_index], subjects[subject_index])
+        triplets.append(triplet)
+
+    # Select one random triplet as the needle.
+    needle = rng.choice(triplets)
+    return {"triplets": triplets, "needle": needle}
+
+
+class NeedleHaystackDataset(ProceduralDataset):
+    """Generates "Needle in a Haystack tasks"""
+
+    def __init__(self, config: NeedleHaystackConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single NeedleHaystack task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description with cube string
+                - answer: None, indicating to use the dynamic evaluator
+                - metadata: dict with generation parameters and example solution
+        """
+        from .needle_data import NAMES, SUBJECTS, VERBS
+
+        rng = Random(self.seed + idx)
+
+        stack = generate_unique_triplets(NAMES, VERBS, SUBJECTS, self.config.num_statements, rng)
+
+        stack_text = ""
+        for triplet in stack["triplets"]:
+            stack_text = stack_text + f"{triplet[0]} {triplet[1]} {triplet[2]}. "
+        question = f"Who {stack['needle'][1]} {stack['needle'][2]}? Reply only with a name."
+
+        full_text = stack_text + "\n" + question
+
+        return {
+            "question": full_text,
+            "answer": stack["needle"][0],
+            "metadata": {"question": question},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the task.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        correct_word = entry["answer"]
+        if not answer:
+            return 0.0  # No answer given
+
+        # Normalize case
+        answer = answer.replace(" ", "").strip().lower()
+        correct_word = correct_word.strip().lower()
+
+        if answer == correct_word:
+            return 1.0  # Correct!
+
+        return 0.01
+
+
+# Register the dataset
+register_dataset("needle_haystack", NeedleHaystackDataset, NeedleHaystackConfig)
diff --git a/reasoning_gym/cognition/number_sequences.py b/reasoning_gym/cognition/number_sequences.py
index bac6a18a..554989aa 100644
--- a/reasoning_gym/cognition/number_sequences.py
+++ b/reasoning_gym/cognition/number_sequences.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from enum import StrEnum
 from random import Random
-from typing import List, Optional
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -42,12 +42,12 @@ class NumberSequenceConfig:
 class PatternRule:
     """Represents a composable sequence pattern rule"""
 
-    def __init__(self, operations: List[Operation], parameters: List[int], subrules: List["PatternRule"] = None):
+    def __init__(self, operations: list[Operation], parameters: list[int], subrules: list["PatternRule"] = None):
         self.operations = operations
         self.parameters = parameters
         self.subrules = subrules or []
 
-    def apply(self, sequence: List[int], position: int) -> int:
+    def apply(self, sequence: list[int], position: int) -> int:
         """Apply the rule to generate the next number"""
         result = sequence[position]  # Start with current number
 
@@ -75,7 +75,7 @@ class PatternRule:
         return result
 
     @classmethod
-    def compose(cls, rules: List["PatternRule"]) -> "PatternRule":
+    def compose(cls, rules: list["PatternRule"]) -> "PatternRule":
         """Create a new rule that composes multiple rules together"""
         return cls([Operation.COMPOSE], [0], subrules=rules)
 
@@ -129,7 +129,7 @@ class PatternGenerator:
 
         return PatternRule(operations, parameters)
 
-    def is_interesting(self, sequence: List[int], max_value: int = 1000) -> bool:
+    def is_interesting(self, sequence: list[int], max_value: int = 1000) -> bool:
         """Check if sequence is interesting enough"""
         if not sequence:
             return False
diff --git a/reasoning_gym/cognition/rectangle_count.py b/reasoning_gym/cognition/rectangle_count.py
index 565df8db..5a86d467 100644
--- a/reasoning_gym/cognition/rectangle_count.py
+++ b/reasoning_gym/cognition/rectangle_count.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -145,14 +145,14 @@ class RectangleCountDataset(ProceduralDataset):
             "metadata": {"puzzle": puzzle, "solution": answer},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the RectangleCount task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/cognition/rubiks_cube.py b/reasoning_gym/cognition/rubiks_cube.py
index f504d92d..c34fd30a 100644
--- a/reasoning_gym/cognition/rubiks_cube.py
+++ b/reasoning_gym/cognition/rubiks_cube.py
@@ -1,7 +1,7 @@
 import re
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, List, Optional
+from typing import Any, Optional
 
 from magiccube.cube import Cube, CubeMove, CubeMoveType
 from magiccube.solver.basic.basic_solver import BasicSolver
@@ -36,7 +36,7 @@ class RubiksCubeDataset(ProceduralDataset):
         ]
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _generate_random_moves(self, rng: Random, cube: Cube, num_steps: int = 50, wide=None) -> List[CubeMove]:
+    def _generate_random_moves(self, rng: Random, cube: Cube, num_steps: int = 50, wide=None) -> list[CubeMove]:
         """Generate a list of random moves (but don't apply them).
         By default scramble only uses wide moves to cubes with size >=4."""
 
@@ -106,7 +106,7 @@ class RubiksCubeDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the cube"""
         reward = 0.0  # default reward
         if answer is not None:
diff --git a/reasoning_gym/composite.py b/reasoning_gym/composite.py
index b30151fb..0e15bb0e 100644
--- a/reasoning_gym/composite.py
+++ b/reasoning_gym/composite.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, replace
 from random import Random
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 import yaml
 
@@ -30,7 +30,7 @@ class CompositeConfig:
 
     size: int = 500
     seed: Optional[int] = None
-    datasets: List[DatasetSpec] = None
+    datasets: list[DatasetSpec] = None
 
     def validate(self):
         """Validate configuration parameters"""
@@ -120,7 +120,7 @@ class CompositeDataset(ProceduralDataset):
 
         return item
 
-    def update_dataset_config(self, dataset_name: str, config_updates: Dict[str, Any]) -> None:
+    def update_dataset_config(self, dataset_name: str, config_updates: dict[str, Any]) -> None:
         """Update configuration of a specific dataset
 
         Args:
@@ -175,7 +175,7 @@ class CompositeDataset(ProceduralDataset):
                 self.weights[i] = weight
                 break
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Forward scoring to appropriate dataset"""
         dataset_name = entry["metadata"]["source_dataset"]
         return self.datasets[dataset_name].score_answer(answer, entry)
diff --git a/reasoning_gym/dataset.py b/reasoning_gym/dataset.py
index 7c929454..f4d263cd 100644
--- a/reasoning_gym/dataset.py
+++ b/reasoning_gym/dataset.py
@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sized
 from copy import deepcopy
 from random import Random
-from typing import Any, Dict, Iterator, Optional, Type, TypeVar
+from typing import Any, Iterator, Optional, Type, TypeVar
 
 
-class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
+class ProceduralDataset(ABC, Sized, Iterable[dict[str, Any]]):
     """Abstract base class for procedural dataset generators"""
 
     def __init__(self, config: Any, seed: Optional[int] = None, size: int = 500):
@@ -28,7 +28,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
         self._current_idx = 0
         return self
 
-    def __next__(self) -> Dict[str, Any]:
+    def __next__(self) -> dict[str, Any]:
         """Get next item in iteration"""
         if self._current_idx >= self.size:
             raise StopIteration
@@ -51,7 +51,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
         """
         raise NotImplementedError
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Overwrite this method in derived classes if a single oracle answer is not available."""
         oracle_answer = entry["answer"].strip()
         reward = 0.0
@@ -70,7 +70,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]):
 T = TypeVar("T", bound="ProceduralDataset")
 
 
-class ReseedingDataset(Iterable[Dict[str, Any]]):
+class ReseedingDataset(Iterable[dict[str, Any]]):
     """Wrapper that makes any ProceduralDataset infinite by reseeding when reaching the end"""
 
     def __init__(self, dataset: T, chunk_size: int = 500):
@@ -100,14 +100,14 @@ class ReseedingDataset(Iterable[Dict[str, Any]]):
         # Create new dataset instance with chunk config
         return self.dataset_cls(new_config)
 
-    def __iter__(self) -> Iterator[Dict[str, Any]]:
+    def __iter__(self) -> Iterator[dict[str, Any]]:
         """Make the dataset iterable"""
         self._current_chunk = 0
         self._current_dataset = self._create_chunk(0)
         self._current_idx = 0
         return self
 
-    def __next__(self) -> Dict[str, Any]:
+    def __next__(self) -> dict[str, Any]:
         """Get next item, creating new chunk if needed"""
         if self._current_idx >= self.chunk_size:
             # Move to next chunk
@@ -119,6 +119,6 @@ class ReseedingDataset(Iterable[Dict[str, Any]]):
         self._current_idx += 1
         return item
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Forward scoring to the wrapped dataset's implementation"""
         return self.dataset.score_answer(answer, entry)
diff --git a/reasoning_gym/factory.py b/reasoning_gym/factory.py
index 8a9b8f5b..3f5a62b2 100644
--- a/reasoning_gym/factory.py
+++ b/reasoning_gym/factory.py
@@ -1,5 +1,5 @@
 from dataclasses import is_dataclass
-from typing import Dict, Type, TypeVar
+from typing import Type, TypeVar
 
 from .dataset import ProceduralDataset
 
@@ -8,7 +8,7 @@ ConfigT = TypeVar("ConfigT")
 DatasetT = TypeVar("DatasetT", bound=ProceduralDataset)
 
 # Global registry of datasets
-DATASETS: Dict[str, tuple[Type[ProceduralDataset], Type]] = {}
+DATASETS: dict[str, tuple[Type[ProceduralDataset], Type]] = {}
 
 
 def register_dataset(name: str, dataset_cls: Type[DatasetT], config_cls: Type[ConfigT]) -> None:
diff --git a/reasoning_gym/games/countdown.py b/reasoning_gym/games/countdown.py
index 19097259..88ad913d 100644
--- a/reasoning_gym/games/countdown.py
+++ b/reasoning_gym/games/countdown.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 import sympy
 from sympy import Symbol, symbols
@@ -89,7 +89,7 @@ class CountdownDataset(ProceduralDataset):
             },
         }
 
-    def _generate_candidate_expression(self, rng: Random, num_terms: int) -> Tuple[sympy.Expr, List[int], List[Symbol]]:
+    def _generate_candidate_expression(self, rng: Random, num_terms: int) -> tuple[sympy.Expr, list[int], list[Symbol]]:
         """Generate a candidate expression with random numbers and operators
 
         Args:
@@ -140,7 +140,7 @@ class CountdownDataset(ProceduralDataset):
 
         return expr, numbers, syms
 
-    def _generate_expression(self, rng: Random) -> Tuple[str, List[int], int]:
+    def _generate_expression(self, rng: Random) -> tuple[str, list[int], int]:
         """Generate a valid expression and its result
 
         Returns:
@@ -171,7 +171,7 @@ class CountdownDataset(ProceduralDataset):
 
         raise ValueError(f"Failed to generate valid expression after {max_attempts} attempts")
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the problem"""
         reward = 0.0
         metadata = entry["metadata"]
diff --git a/reasoning_gym/games/futoshiki.py b/reasoning_gym/games/futoshiki.py
index f71c6e2c..ce70e14a 100644
--- a/reasoning_gym/games/futoshiki.py
+++ b/reasoning_gym/games/futoshiki.py
@@ -4,7 +4,7 @@ import copy
 import itertools
 from dataclasses import dataclass
 from random import Random
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -85,7 +85,7 @@ class FutoshikiDataset(ProceduralDataset):
         }
 
     def _puzzle_to_string(
-        self, puzzle_grid: List[List[int]], constraints: Dict[Tuple[Tuple[int, int], Tuple[int, int]], str]
+        self, puzzle_grid: list[list[int]], constraints: dict[tuple[tuple[int, int], tuple[int, int]], str]
     ) -> str:
         """
         Formats a Futoshiki puzzle grid as a string with constraints.
@@ -161,9 +161,9 @@ class FutoshikiDataset(ProceduralDataset):
 
     def _solve_logical(
         self,
-        grid: List[List[int]],
-        constraints: Dict[Tuple[Tuple[int, int], Tuple[int, int]], str],
-    ) -> Tuple[List[List[int]], List[List[Set[int]]]]:
+        grid: list[list[int]],
+        constraints: dict[tuple[tuple[int, int], tuple[int, int]], str],
+    ) -> tuple[list[list[int]], list[list[set[int]]]]:
         """
         Apply logical rules to progress solution.
         Returns current state if logical rules can't progress further.
@@ -172,7 +172,7 @@ class FutoshikiDataset(ProceduralDataset):
         size, working_grid = len(grid), copy.deepcopy(grid)
 
         # Starting point all numbers are candidates for all unfilled squares
-        candidates: List[List[Set[int]]] = [
+        candidates: list[list[set[int]]] = [
             [set(range(1, len(grid) + 1)) if grid[r][c] == 0 else {grid[r][c]} for c in range(len(grid))]
             for r in range(len(grid))
         ]
@@ -214,7 +214,7 @@ class FutoshikiDataset(ProceduralDataset):
 
             # Eliminate candidates based on constraints
             # Based on currently filled values, eliminate candidates that violate constraints
-            def _eliminate_by_constraint(rc_less: Tuple[int, int], rc_greater: Tuple[int, int]) -> bool:
+            def _eliminate_by_constraint(rc_less: tuple[int, int], rc_greater: tuple[int, int]) -> bool:
                 r_less, c_less = rc_less
                 r_greater, c_greater = rc_greater
                 progress = False
@@ -399,9 +399,9 @@ class FutoshikiDataset(ProceduralDataset):
 
     def _solve(
         self,
-        grid: List[List[int]],
-        constraints: Dict[Tuple[Tuple[int, int], Tuple[int, int]], str],
-    ) -> List[List[int]] | None:
+        grid: list[list[int]],
+        constraints: dict[tuple[tuple[int, int], tuple[int, int]], str],
+    ) -> list[list[int]] | None:
         """
         Backtracking Futoshiki solver. Used to verify generated puzzles.
         Applies logical rules first then backtracks to fill gaps.
@@ -442,11 +442,11 @@ class FutoshikiDataset(ProceduralDataset):
 
     def _is_valid(
         self,
-        grid: List[List[int]],
+        grid: list[list[int]],
         row: int,
         col: int,
         val: int,
-        constraints: Dict[Tuple[Tuple[int, int], Tuple[int, int]], str],
+        constraints: dict[tuple[tuple[int, int], tuple[int, int]], str],
     ) -> bool:
         """Check row, col, and inequality constraints for placing val in grid[row][col]."""
         size = len(grid)
@@ -482,7 +482,7 @@ class FutoshikiDataset(ProceduralDataset):
         grid[row][col] = original_val
         return True
 
-    def _generate_random_solution(self, size: int, rng: Random) -> List[List[int]]:
+    def _generate_random_solution(self, size: int, rng: Random) -> list[list[int]]:
         """
         Generates a random valid completed Futoshiki solution with numbers 1..size.
         Ensures each row and column has unique numbers.
@@ -514,8 +514,8 @@ class FutoshikiDataset(ProceduralDataset):
         raise ValueError("Could not generate a random solution.")
 
     def _generate_random_constraints(
-        self, solution: List[List[int]], difficulty: int, rng: Random
-    ) -> Dict[Tuple[Tuple[int, int], Tuple[int, int]], str]:
+        self, solution: list[list[int]], difficulty: int, rng: Random
+    ) -> dict[tuple[tuple[int, int], tuple[int, int]], str]:
         """
         Randomly add inequality constraints that match the solution.
         We only add constraints for adjacent cells (horizontal or vertical).
@@ -570,10 +570,10 @@ class FutoshikiDataset(ProceduralDataset):
 
     def _remove_clues(
         self,
-        grid: List[List[int]],
-        constraints: Dict[Tuple[Tuple[int, int], Tuple[int, int]], str],
+        grid: list[list[int]],
+        constraints: dict[tuple[tuple[int, int], tuple[int, int]], str],
         rng: Random,
-    ) -> List[List[int]]:
+    ) -> list[list[int]]:
         """
         Remove clues from a full solution to try to maintain a unique-solution puzzle.
         We remove in random order until we reach our target, or can't without losing uniqueness.
@@ -637,7 +637,9 @@ class FutoshikiDataset(ProceduralDataset):
             row = 0
             num_matching = 0
             for ln in answer.split("\n"):
-                numbers = [int(c) for c in ln if c.isnumeric()]
+                if row >= len(solution):
+                    break
+                numbers = [int(c) for c in ln if c in "123456789"]
                 if len(numbers) != len(solution[0]):
                     continue  # ignore lines without numbers
                 for a, b in zip(solution[row], numbers):
diff --git a/reasoning_gym/games/knight_swap.py b/reasoning_gym/games/knight_swap.py
index 8e8c1167..1e0270f5 100644
--- a/reasoning_gym/games/knight_swap.py
+++ b/reasoning_gym/games/knight_swap.py
@@ -2,7 +2,7 @@ import collections
 import json
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, FrozenSet, List, Optional, Set, Tuple
+from typing import FrozenSet, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -81,7 +81,7 @@ class KnightSwapLogic:
         return {abs(a_col - b_col), abs(a_row - b_row)} == {1, 2}
 
     @staticmethod
-    def is_connected(graph: Dict[str, List[str]]) -> bool:
+    def is_connected(graph: dict[str, list[str]]) -> bool:
         """Check if a graph is connected (all nodes reachable from any starting node)."""
         if not graph:
             return True
@@ -98,7 +98,7 @@ class KnightSwapLogic:
         return len(visited) == len(graph)
 
     @staticmethod
-    def generate_board(num_nodes: int, rng: Random, max_attempts: int = 1000) -> Dict[str, List[str]]:
+    def generate_board(num_nodes: int, rng: Random, max_attempts: int = 1000) -> dict[str, list[str]]:
         """Generate a random connected board where edges represent valid knight moves."""
         candidates = ["A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3", "D1", "D2", "D3"]
         attempts = 0
@@ -120,8 +120,8 @@ class KnightSwapLogic:
 
     @staticmethod
     def solve_swap(
-        board: Dict[str, List[str]], pieces: Dict[str, str], start_turn: str = "w"
-    ) -> Optional[List[Tuple[str, str, str]]]:
+        board: dict[str, list[str]], pieces: dict[str, str], start_turn: str = "w"
+    ) -> Optional[list[tuple[str, str, str]]]:
         """Find a sequence of moves to swap white and black pieces positions."""
 
         @dataclass(frozen=True)
@@ -177,7 +177,7 @@ class KnightSwapDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
         self.game_logic = KnightSwapLogic()
 
-    def _format_board(self, board: Dict[str, List[str]], pieces: Dict[str, str]) -> str:
+    def _format_board(self, board: dict[str, list[str]], pieces: dict[str, str]) -> str:
         """Format the board state as a string."""
         positions = list(board.keys())
         if not positions:
@@ -206,13 +206,13 @@ class KnightSwapDataset(ProceduralDataset):
 
         return "\n".join(lines)
 
-    def _format_moves(self, moves: List[Tuple[str, str, str]]) -> str:
+    def _format_moves(self, moves: list[tuple[str, str, str]]) -> str:
         """Format the solution moves as a string."""
         if not moves:
             return "No"
         return json.dumps([f"{color},{start},{end}" for color, start, end in moves])
 
-    def __getitem__(self, idx: int) -> Dict:
+    def __getitem__(self, idx: int) -> dict:
         """Generate a single Knight Swap puzzle."""
         rng = Random(self.seed + idx)
 
@@ -303,7 +303,7 @@ class KnightSwapDataset(ProceduralDataset):
 
         raise ValueError(f"Failed to generate valid puzzle after trying {self.config.max_attempts} different boards")
 
-    def score_answer(self, answer: Optional[str], entry: Dict) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict) -> float:
         """Score the user's solution for the Knight Swap puzzle.
 
         The answer should be either:
diff --git a/reasoning_gym/games/mini_sudoku.py b/reasoning_gym/games/mini_sudoku.py
index 3ca1277c..319569ff 100644
--- a/reasoning_gym/games/mini_sudoku.py
+++ b/reasoning_gym/games/mini_sudoku.py
@@ -3,7 +3,7 @@
 import copy
 from dataclasses import dataclass
 from random import Random
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -46,7 +46,7 @@ class MiniSudokuDataset(ProceduralDataset):
         self._current_idx += 1
         return item
 
-    def _is_valid(self, board: List[List[int]], row: int, col: int, num: int) -> bool:
+    def _is_valid(self, board: list[list[int]], row: int, col: int, num: int) -> bool:
         """Check if number can be placed at position"""
         # Check row
         if num in board[row]:
@@ -64,7 +64,7 @@ class MiniSudokuDataset(ProceduralDataset):
                     return False
         return True
 
-    def _solve(self, board: List[List[int]]) -> bool:
+    def _solve(self, board: list[list[int]]) -> bool:
         """Solve mini sudoku using backtracking"""
         empty = self._find_empty(board)
         if not empty:
@@ -79,7 +79,7 @@ class MiniSudokuDataset(ProceduralDataset):
                 board[row][col] = 0
         return False
 
-    def _find_empty(self, board: List[List[int]]) -> Optional[Tuple[int, int]]:
+    def _find_empty(self, board: list[list[int]]) -> Optional[tuple[int, int]]:
         """Find an empty cell"""
         for i in range(4):
             for j in range(4):
@@ -87,7 +87,7 @@ class MiniSudokuDataset(ProceduralDataset):
                     return (i, j)
         return None
 
-    def _generate_solved_board(self, rng: Random) -> List[List[int]]:
+    def _generate_solved_board(self, rng: Random) -> list[list[int]]:
         """Generate a complete solved mini sudoku board"""
         board = [[0] * 4 for _ in range(4)]
 
@@ -115,10 +115,10 @@ class MiniSudokuDataset(ProceduralDataset):
 
         raise RuntimeError("Failed to generate valid mini sudoku board")
 
-    def _count_solutions(self, board: List[List[int]], limit: int = 2) -> int:
+    def _count_solutions(self, board: list[list[int]], limit: int = 2) -> int:
         """Count the number of solutions for a given board"""
 
-        def _count_solutions_helper(board: List[List[int]]) -> int:
+        def _count_solutions_helper(board: list[list[int]]) -> int:
             empty = self._find_empty(board)
             if not empty:
                 return 1
@@ -136,7 +136,7 @@ class MiniSudokuDataset(ProceduralDataset):
 
         return _count_solutions_helper(board)
 
-    def _create_puzzle(self, solved_board: List[List[int]], num_empty: int, rng: Random) -> List[List[int]]:
+    def _create_puzzle(self, solved_board: list[list[int]], num_empty: int, rng: Random) -> list[list[int]]:
         """Create puzzle by removing numbers from solved board"""
         puzzle = [row[:] for row in solved_board]
         cells = [(i, j) for i in range(4) for j in range(4)]
@@ -157,7 +157,7 @@ class MiniSudokuDataset(ProceduralDataset):
 
         return puzzle
 
-    def _board_to_string(self, board: List[List[int]]) -> str:
+    def _board_to_string(self, board: list[list[int]]) -> str:
         """Convert board to string representation"""
         return "\n".join(" ".join(str(x) if x != 0 else "_" for x in row) for row in board)
 
@@ -214,7 +214,9 @@ class MiniSudokuDataset(ProceduralDataset):
             row = 0
             num_matching = 0
             for ln in answer.split("\n"):
-                numbers = [int(c) for c in ln if c.isnumeric()]
+                if row >= len(solution):
+                    break
+                numbers = [int(c) for c in ln if c in "123456789"]
                 if len(numbers) != board_size:
                     continue  # ignore lines without numbers
                 for a, b in zip(solution[row], numbers):
diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py
index 7f85c0d7..61f6ea66 100644
--- a/reasoning_gym/games/n_queens.py
+++ b/reasoning_gym/games/n_queens.py
@@ -7,7 +7,7 @@ https://en.wikipedia.org/wiki/Eight_queens_puzzle
 from copy import deepcopy
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, List, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -65,7 +65,7 @@ class NQueensDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
         self._solutions = self._get_all_solutions(config.n)
 
-    def _get_all_solutions(self, n: int) -> List[List[List[str]]]:
+    def _get_all_solutions(self, n: int) -> list[list[list[str]]]:
         """Get all solutions for the N Queens puzzle"""
 
         visited_cols = set()
@@ -97,7 +97,7 @@ class NQueensDataset(ProceduralDataset):
         backtrack(0)
         return res
 
-    def _create_puzzle(self, solved_board: List[List[str]], num_removed: int, rng: Random) -> List[List[str]]:
+    def _create_puzzle(self, solved_board: list[list[str]], num_removed: int, rng: Random) -> list[list[str]]:
         """Create puzzle by removing queens from solved board"""
         puzzle = deepcopy(solved_board)
         queens = [(i, j) for i in range(len(puzzle)) for j in range(len(puzzle)) if puzzle[i][j] == "Q"]
@@ -107,15 +107,15 @@ class NQueensDataset(ProceduralDataset):
             puzzle[x][y] = "_"
         return puzzle
 
-    def _board_to_string(self, board: List[List[str]]) -> str:
+    def _board_to_string(self, board: list[list[str]]) -> str:
         """Convert board to string representation"""
         return "\n".join(" ".join(x for x in row) for row in board)
 
-    def _string_to_board(self, board_str: str) -> List[List[str]]:
+    def _string_to_board(self, board_str: str) -> list[list[str]]:
         """Convert string representation to board"""
         return [list(row.split()) for row in board_str.strip().split("\n")]
 
-    def _is_tractable_solution(self, puzzle: List[List[str]], solution: List[List[str]]) -> bool:
+    def _is_tractable_solution(self, puzzle: list[list[str]], solution: list[list[str]]) -> bool:
         """Check if a solution is achievable from the starting state of the puzzle"""
         for r in range(len(puzzle)):
             for c in range(len(puzzle)):
@@ -150,7 +150,7 @@ class NQueensDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         valid_solutions = entry["metadata"]["valid_answers"]
         if answer is not None:
             if answer in valid_solutions:
diff --git a/reasoning_gym/games/rush_hour.py b/reasoning_gym/games/rush_hour.py
index 0ffaf277..1ab9489a 100644
--- a/reasoning_gym/games/rush_hour.py
+++ b/reasoning_gym/games/rush_hour.py
@@ -6,7 +6,7 @@ https://www.michaelfogleman.com/rush/
 import random
 import re
 from dataclasses import dataclass
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..data import get_data_file_path
 from ..factory import ProceduralDataset, register_dataset
@@ -105,7 +105,7 @@ class RushHourDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
         # Load and filter puzzles from data file
-        self.puzzles: List[Tuple[str, int]] = []  # (board_config, min_moves)
+        self.puzzles: list[tuple[str, int]] = []  # (board_config, min_moves)
 
         data_path = get_data_file_path("rush_18k.txt")
         with data_path.open() as f:
diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py
index 124aaf48..1b12169c 100644
--- a/reasoning_gym/games/sokoban.py
+++ b/reasoning_gym/games/sokoban.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 import numpy as np
 
@@ -80,14 +80,14 @@ Here is your puzzle:
             "metadata": {"gamestr": gamestr, "difficulty": difficulty},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the Sokoban task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/games/sudoku.py b/reasoning_gym/games/sudoku.py
index 5efe79e7..9b07184b 100644
--- a/reasoning_gym/games/sudoku.py
+++ b/reasoning_gym/games/sudoku.py
@@ -3,7 +3,7 @@
 import copy
 from dataclasses import dataclass
 from random import Random
-from typing import Any, List, Optional, Set, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -47,7 +47,7 @@ class SudokuDataset(ProceduralDataset):
         self._current_idx += 1
         return item
 
-    def _is_valid(self, board: List[List[int]], row: int, col: int, num: int) -> bool:
+    def _is_valid(self, board: list[list[int]], row: int, col: int, num: int) -> bool:
         """Check if number can be placed at position"""
         # Check row
         if num in board[row]:
@@ -65,7 +65,7 @@ class SudokuDataset(ProceduralDataset):
                     return False
         return True
 
-    def _get_possible_values(self, board: List[List[int]], row: int, col: int) -> Set[int]:
+    def _get_possible_values(self, board: list[list[int]], row: int, col: int) -> set[int]:
         """Get all possible values for a cell."""
         row_values = set(board[row])
         col_values = set(board[i][col] for i in range(9))
@@ -80,7 +80,7 @@ class SudokuDataset(ProceduralDataset):
         used_values = row_values | col_values | box_values
         return set(range(1, 10)) - used_values
 
-    def _solve(self, board: List[List[int]]) -> bool:
+    def _solve(self, board: list[list[int]]) -> bool:
         """Solve sudoku using backtracking"""
         empty = self._find_empty(board)
         if not empty:
@@ -94,7 +94,7 @@ class SudokuDataset(ProceduralDataset):
             board[row][col] = 0
         return False
 
-    def _find_empty(self, board: List[List[int]]) -> Optional[Tuple[int, int]]:
+    def _find_empty(self, board: list[list[int]]) -> Optional[tuple[int, int]]:
         """Find an empty cell"""
         for i in range(9):
             for j in range(9):
@@ -102,7 +102,7 @@ class SudokuDataset(ProceduralDataset):
                     return (i, j)
         return None
 
-    def _generate_solved_board(self, rng: Random) -> List[List[int]]:
+    def _generate_solved_board(self, rng: Random) -> list[list[int]]:
         """Generate a complete solved sudoku board"""
         board = [[0] * 9 for _ in range(9)]
 
@@ -120,10 +120,10 @@ class SudokuDataset(ProceduralDataset):
         self._solve(board)
         return board
 
-    def _count_solutions(self, board: List[List[int]], limit: int = 2) -> int:
+    def _count_solutions(self, board: list[list[int]], limit: int = 2) -> int:
         """Count the number of solutions for a given board"""
 
-        def _get_min_possibilities_cell(board: List[List[int]]) -> Optional[Tuple[int, int, Set[int]]]:
+        def _get_min_possibilities_cell(board: list[list[int]]) -> Optional[tuple[int, int, set[int]]]:
             """
             Get the cell with the lowest number of possibilities.
             Returns None if the board is already solved.
@@ -145,7 +145,7 @@ class SudokuDataset(ProceduralDataset):
 
             return (*min_cell, min_values) if min_cell else None
 
-        def _count_solutions_helper(board: List[List[int]]) -> int:
+        def _count_solutions_helper(board: list[list[int]]) -> int:
             cell_info = _get_min_possibilities_cell(board)
             if not cell_info:
                 return 1
@@ -162,7 +162,7 @@ class SudokuDataset(ProceduralDataset):
 
         return _count_solutions_helper(board)
 
-    def _create_puzzle(self, solved_board: List[List[int]], num_empty: int, rng: Random) -> List[List[int]]:
+    def _create_puzzle(self, solved_board: list[list[int]], num_empty: int, rng: Random) -> list[list[int]]:
         """Create puzzle by removing numbers from solved board"""
         puzzle = [row[:] for row in solved_board]
         cells = [(i, j) for i in range(9) for j in range(9)]
@@ -183,7 +183,7 @@ class SudokuDataset(ProceduralDataset):
 
         return puzzle
 
-    def _board_to_string(self, board: List[List[int]]) -> str:
+    def _board_to_string(self, board: list[list[int]]) -> str:
         """Convert board to string representation"""
         return "\n".join(" ".join(str(x) if x != 0 else "_" for x in row) for row in board)
 
@@ -233,7 +233,9 @@ class SudokuDataset(ProceduralDataset):
             row = 0
             num_matching = 0
             for ln in answer.split("\n"):
-                numbers = [int(c) for c in ln if c.isnumeric()]
+                if row >= len(solution):
+                    break
+                numbers = [int(c) for c in ln if c in "123456789"]
                 if len(numbers) != board_size:
                     continue  # ignore lines without numbers
                 for a, b in zip(solution[row], numbers):
diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py
index 1a868251..47afbb05 100644
--- a/reasoning_gym/games/tower_of_hanoi.py
+++ b/reasoning_gym/games/tower_of_hanoi.py
@@ -4,7 +4,7 @@ import math
 import random
 import re
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -62,23 +62,23 @@ class MoveGenerator:
     It maintains the current state of all pegs to ensure move validity.
     """
 
-    def __init__(self, num_disks: int, pegs: List[int], start: int, target: int):
+    def __init__(self, num_disks: int, pegs: list[int], start: int, target: int):
         self.num_disks = num_disks
         self.pegs = pegs
         self.start = start
         self.target = target
         self.auxiliary_pegs = [peg for peg in pegs if peg not in (start, target)]
-        self.pegs_state: Dict[int, List[int]] = {peg: [] for peg in pegs}
+        self.pegs_state: dict[int, list[int]] = {peg: [] for peg in pegs}
         for disk in range(num_disks, 0, -1):  # Largest disk at the bottom
             self.pegs_state[start].append(disk)
-        self.moves: List[str] = []
-        self.memo: Dict[Tuple[int, int], int] = {}  # Memoization for T(n, k)
+        self.moves: list[str] = []
+        self.memo: dict[tuple[int, int], int] = {}  # Memoization for T(n, k)
 
-    def generate_moves(self) -> List[str]:
+    def generate_moves(self) -> list[str]:
         self.move(n=self.num_disks, source=self.start, target=self.target, auxiliary_pegs=self.auxiliary_pegs)
         return self.moves
 
-    def move(self, n: int, source: int, target: int, auxiliary_pegs: List[int]):
+    def move(self, n: int, source: int, target: int, auxiliary_pegs: list[int]):
         if n == 0:
             return
         if n == 1:
@@ -175,10 +175,10 @@ class HanoiDataset(ProceduralDataset):
         Returns:
             dict with:
             - "question": Text describing the problem setup.
-            - "answer": List of moves to solve the puzzle.
+            - "answer": list of moves to solve the puzzle.
             - "metadata": Configuration and solution details.
             - "initial_state": (Optional) ASCII visualization of the initial pegs.
-            - "states": (Optional) List of ASCII visualizations after each move.
+            - "states": (Optional) list of ASCII visualizations after each move.
         """
         rng = random.Random(self.seed + idx if self.seed is not None else None)
 
@@ -282,11 +282,11 @@ class HanoiDataset(ProceduralDataset):
 
         if self.visualize:
             result["initial_state"] = initial_state_str
-            result["states"] = states  # List of all states including initial and after each move
+            result["states"] = states  # list of all states including initial and after each move
 
         return result
 
-    def _visualize_state(self, pegs_state: Dict[int, List[int]]) -> str:
+    def _visualize_state(self, pegs_state: dict[int, list[int]]) -> str:
         """
         Create an ASCII visualization of the current state of the pegs.
         Adapts to variable number of pegs.
@@ -321,7 +321,7 @@ class HanoiDataset(ProceduralDataset):
 
         return visualization
 
-    def _validate_move(self, pegs_state: Dict[int, List[int]], move: str) -> bool:
+    def _validate_move(self, pegs_state: dict[int, list[int]], move: str) -> bool:
         """
         Validate that a move adheres to the Tower of Hanoi rules.
 
@@ -356,7 +356,7 @@ class HanoiDataset(ProceduralDataset):
             print(f"Error validating move '{move}': {e}")
             return False
 
-    def _parse_move(self, move: str) -> Tuple[int, int, int]:
+    def _parse_move(self, move: str) -> tuple[int, int, int]:
         """
         Parse a move string and extract disk number, from peg, and to peg.
 
@@ -376,7 +376,7 @@ class HanoiDataset(ProceduralDataset):
         to_peg = int(match.group(3))
         return disk, from_peg, to_peg
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """
         Score the user's solution for the Tower of Hanoi puzzle.
 
diff --git a/reasoning_gym/games/tsumego.py b/reasoning_gym/games/tsumego.py
index f979a1e4..5da5b074 100644
--- a/reasoning_gym/games/tsumego.py
+++ b/reasoning_gym/games/tsumego.py
@@ -19,7 +19,7 @@ TODO: Generate multi-step Tsumego problems.
 import re
 from dataclasses import dataclass
 from random import Random
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -62,11 +62,11 @@ class TsumegoDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
     # New helper method for board copying
-    def _copy_board(self, board: List[List[str]]) -> List[List[str]]:
+    def _copy_board(self, board: list[list[str]]) -> list[list[str]]:
         """Return a deep copy of the board."""
         return [row[:] for row in board]
 
-    def _get_liberties(self, board: List[List[str]], row: int, col: int) -> Set[Tuple[int, int]]:
+    def _get_liberties(self, board: list[list[str]], row: int, col: int) -> set[tuple[int, int]]:
         """Get empty adjacent points (liberties) for a stone"""
         size = len(board)
         liberties = set()
@@ -76,7 +76,7 @@ class TsumegoDataset(ProceduralDataset):
                 liberties.add((r, c))
         return liberties
 
-    def _get_group(self, board: List[List[str]], row: int, col: int) -> Set[Tuple[int, int]]:
+    def _get_group(self, board: list[list[str]], row: int, col: int) -> set[tuple[int, int]]:
         """Get all stones in the same group (connected stones of same color)"""
         size = len(board)
         color = board[row][col]
@@ -94,14 +94,14 @@ class TsumegoDataset(ProceduralDataset):
                     queue.append((nr, nc))
         return group
 
-    def _count_liberties(self, board: List[List[str]], group: Set[Tuple[int, int]]) -> int:
+    def _count_liberties(self, board: list[list[str]], group: set[tuple[int, int]]) -> int:
         """Count total liberties for a group of stones"""
         liberties = set()
         for row, col in group:
             liberties.update(self._get_liberties(board, row, col))
         return len(liberties)
 
-    def _would_capture(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+    def _would_capture(self, board: list[list[str]], row: int, col: int, color: str) -> bool:
         """Check if a move would capture any opponent stones"""
         size = len(board)
         opponent = "O" if color == "X" else "X"
@@ -120,7 +120,7 @@ class TsumegoDataset(ProceduralDataset):
                     return True
         return False
 
-    def _is_valid_move(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+    def _is_valid_move(self, board: list[list[str]], row: int, col: int, color: str) -> bool:
         """Check if a move is legal (not suicide, unless it captures)"""
         size = len(board)
         if not (0 <= row < size and 0 <= col < size):
@@ -139,7 +139,7 @@ class TsumegoDataset(ProceduralDataset):
         group = self._get_group(board_copy, row, col)
         return self._count_liberties(board_copy, group) > 0
 
-    def _make_move(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+    def _make_move(self, board: list[list[str]], row: int, col: int, color: str) -> bool:
         """Make a move and update ko point. Returns True if move was valid."""
         if not self._is_valid_move(board, row, col, color):
             return False
@@ -164,7 +164,7 @@ class TsumegoDataset(ProceduralDataset):
 
         return True
 
-    def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], Tuple[int, int]]:
+    def _generate_capture_problem(self, size: int, rng: Random) -> tuple[list[list[str]], tuple[int, int]]:
         """Generate a capture problem"""
         board = [["." for _ in range(size)] for _ in range(size)]
         stones_placed = 0
@@ -235,7 +235,7 @@ class TsumegoDataset(ProceduralDataset):
             tries += 1
         raise RuntimeError("Failed to generate a capture problem")
 
-    def _board_to_string(self, board: List[List[str]]) -> str:
+    def _board_to_string(self, board: list[list[str]]) -> str:
         """Convert board to string representation"""
         size = len(board)
         # Column labels
@@ -272,7 +272,7 @@ class TsumegoDataset(ProceduralDataset):
             "metadata": {"difficulty": {"board_size": size}, "board": board, "solution": solution_str},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Score the answer against the solution"""
         if answer is None:
             return 0.0
diff --git a/reasoning_gym/geometry/advanced_geometry.py b/reasoning_gym/geometry/advanced_geometry.py
index 6f64daf8..90c8b463 100644
--- a/reasoning_gym/geometry/advanced_geometry.py
+++ b/reasoning_gym/geometry/advanced_geometry.py
@@ -1,11 +1,10 @@
 import random
-import re
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 import numpy as np
 import sympy
-from sympy.geometry import Point, Segment, Triangle
+from sympy.geometry import Point
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -23,7 +22,7 @@ class AdvancedGeometryConfig:
 
     # Probability or list of tasks we want to generate
     # For demonstration, we have three categories:
-    task_types: List[str] = field(
+    task_types: list[str] = field(
         default_factory=lambda: [
             "orthocenter",
             "incircle_radius",
@@ -228,7 +227,7 @@ class AdvancedGeometryDataset(ProceduralDataset):
         }
         return question, answer_str, metadata
 
-    def score_answer(self, answer: str | None, entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: str | None, entry: dict[str, Any]) -> float:
         reward = 0.0
         expected_answer = entry["answer"]
         metadata = entry["metadata"]
diff --git a/reasoning_gym/graphs/course_schedule.py b/reasoning_gym/graphs/course_schedule.py
index 75abe44e..15497cc9 100644
--- a/reasoning_gym/graphs/course_schedule.py
+++ b/reasoning_gym/graphs/course_schedule.py
@@ -8,7 +8,7 @@ https://leetcode.com/problems/course-schedule/description/
 from collections import defaultdict
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -54,7 +54,7 @@ class CourseScheduleDataset(ProceduralDataset):
     def __init__(self, config: CourseScheduleConfig):
         super().__init__(config=config, seed=config.seed, size=config.size)
 
-    def _can_finish(self, num_courses: int, prerequisites: List[List[int]]) -> bool:
+    def _can_finish(self, num_courses: int, prerequisites: list[list[int]]) -> bool:
         adj = defaultdict(list)
         for course, prereq in prerequisites:
             adj[course].append(prereq)
@@ -81,7 +81,7 @@ class CourseScheduleDataset(ProceduralDataset):
 
         return True
 
-    def _create_prerequisites(self, rng: Random, courses: List[int], solvable: bool) -> List[List[int]]:
+    def _create_prerequisites(self, rng: Random, courses: list[int], solvable: bool) -> list[list[int]]:
         """Create a list of prerequisites for each course"""
         prerequisites = []
         # Generate a valid course schedule
diff --git a/reasoning_gym/graphs/family_relationships.py b/reasoning_gym/graphs/family_relationships.py
index d875d7a1..8011e375 100644
--- a/reasoning_gym/graphs/family_relationships.py
+++ b/reasoning_gym/graphs/family_relationships.py
@@ -2,7 +2,7 @@ import random
 from dataclasses import dataclass, field
 from enum import StrEnum
 from itertools import count
-from typing import List, Optional, Set, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -37,8 +37,8 @@ class Person:
     gender: Gender
     id: int
     spouse: Optional["Person"] = None
-    parents: List["Person"] = field(default_factory=list)
-    children: List["Person"] = field(default_factory=list)
+    parents: list["Person"] = field(default_factory=list)
+    children: list["Person"] = field(default_factory=list)
 
     def __hash__(self):
         return self.id
@@ -65,7 +65,7 @@ class FamilyRelationshipsConfig:
 
     min_family_size: int = 4
     max_family_size: int = 8
-    male_names: List[str] = field(
+    male_names: list[str] = field(
         default_factory=lambda: [
             "James",
             "John",
@@ -112,7 +112,7 @@ class FamilyRelationshipsConfig:
             "Finn",
         ]
     )
-    female_names: List[str] = field(
+    female_names: list[str] = field(
         default_factory=lambda: [
             "Mary",
             "Patricia",
@@ -207,7 +207,7 @@ class FamilyRelationshipsDataset(ProceduralDataset):
             },
         }
 
-    def _generate_family(self, rng: random.Random) -> Set[Person]:
+    def _generate_family(self, rng: random.Random) -> set[Person]:
         """Generate a random family tree"""
         family_size = rng.randint(self.config.min_family_size, self.config.max_family_size)
         family = set()
@@ -292,8 +292,8 @@ class FamilyRelationshipsDataset(ProceduralDataset):
         return family
 
     def _get_relationship_question(
-        self, rng: random.Random, family: Set[Person]
-    ) -> Tuple[Person, Person, Relationship]:
+        self, rng: random.Random, family: set[Person]
+    ) -> tuple[Person, Person, Relationship]:
         """Select two family members and determine their relationship"""
         person1, person2 = rng.sample(list(family), 2)
 
@@ -326,7 +326,7 @@ class FamilyRelationshipsDataset(ProceduralDataset):
 
         return person1, person2, relationship
 
-    def _generate_story(self, family: Set[Person]) -> str:
+    def _generate_story(self, family: set[Person]) -> str:
         """Generate a story describing the family relationships"""
         story_parts = []
 
diff --git a/reasoning_gym/graphs/largest_island.py b/reasoning_gym/graphs/largest_island.py
index 1b9269be..528a7713 100644
--- a/reasoning_gym/graphs/largest_island.py
+++ b/reasoning_gym/graphs/largest_island.py
@@ -7,7 +7,7 @@ https://leetcode.com/problems/max-area-of-island/description/
 from collections import deque
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -58,7 +58,7 @@ class LargestIslandDataset(ProceduralDataset):
     def _is_valid_cell(self, r: int, c: int) -> bool:
         return 0 <= r < self.config.rows and 0 <= c < self.config.cols
 
-    def _create_grid(self, rng: Random) -> List[List[int]]:
+    def _create_grid(self, rng: Random) -> list[list[int]]:
         """Create a random grid of islands using a random walk algorithm"""
         grid = [[0] * self.config.cols for _ in range(self.config.rows)]
         directions = [(-1, 0), (1, 0), (0, -1), (0, 1)]  # Up, Down, Left, Right
@@ -81,7 +81,7 @@ class LargestIslandDataset(ProceduralDataset):
 
         return grid
 
-    def _get_largest_island(self, grid: List[List[int]]) -> int:
+    def _get_largest_island(self, grid: list[list[int]]) -> int:
         """Find the largest island in the grid"""
         directions = [(-1, 0), (1, 0), (0, -1), (0, 1)]  # Up, Down, Left, Right
         visited = set()
@@ -108,11 +108,11 @@ class LargestIslandDataset(ProceduralDataset):
 
         return max_area
 
-    def _grid_to_string(self, grid: List[List[int]]) -> str:
+    def _grid_to_string(self, grid: list[list[int]]) -> str:
         """Convert grid to a string representation"""
         return "\n".join(" ".join(str(cell) for cell in row) for row in grid)
 
-    def _string_to_board(self, grid_str: str) -> List[List[int]]:
+    def _string_to_board(self, grid_str: str) -> list[list[int]]:
         """Convert string representation to a grid"""
         return [[int(cell) for cell in row.split()] for row in grid_str.split("\n")]
 
diff --git a/reasoning_gym/graphs/quantum_lock.py b/reasoning_gym/graphs/quantum_lock.py
index 5863a5bf..4d810145 100644
--- a/reasoning_gym/graphs/quantum_lock.py
+++ b/reasoning_gym/graphs/quantum_lock.py
@@ -164,7 +164,7 @@ Buttons:
         # If no solution found, regenerate
         return self.generate_quantum_puzzle(rng, difficulty)
 
-    def score_answer(self, answer: Optional[str], entry: dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the task.
 
         The function awards 1.0 for a correct answer and less otherwise.
diff --git a/reasoning_gym/induction/__init__.py b/reasoning_gym/induction/__init__.py
new file mode 100644
index 00000000..f6dc1504
--- /dev/null
+++ b/reasoning_gym/induction/__init__.py
@@ -0,0 +1,10 @@
+"""
+Arithmetic tasks for training reasoning capabilities:
+"""
+
+from .list_functions import ListFunctionsDataset, ListFunctionsDatasetConfig
+
+__all__ = [
+    "ListFunctionsDataset",
+    "ListFunctionsDatasetConfig",
+]
diff --git a/reasoning_gym/induction/list_functions/__init__.py b/reasoning_gym/induction/list_functions/__init__.py
new file mode 100644
index 00000000..d99a03b9
--- /dev/null
+++ b/reasoning_gym/induction/list_functions/__init__.py
@@ -0,0 +1,6 @@
+from .list_functions import ListFunctionsDataset, ListFunctionsDatasetConfig
+
+__all__ = [
+    "ListFunctionsDatasetConfig",
+    "ListFunctionsDataset",
+]
diff --git a/reasoning_gym/induction/list_functions/generators.py b/reasoning_gym/induction/list_functions/generators.py
new file mode 100644
index 00000000..3e1966b8
--- /dev/null
+++ b/reasoning_gym/induction/list_functions/generators.py
@@ -0,0 +1,363 @@
+import random
+from random import Random
+from typing import Any, Dict
+
+NUM_OF_PAIRS_GENERATED = 5
+
+
+def create_random_list(rng: Random):
+    length = rng.randint(3, 10)
+    return [rng.randint(1, 100) for _ in range(length)]
+
+
+def create_list_of_fives(rng: Random):
+    length = rng.randint(1, 7)  # Random length between 1 and 7
+    return [5] * length
+
+
+def sort_integers(lst, order="ascending"):
+    """
+    Sorts a list of integers in ascending or descending order.
+
+    Parameters:
+        lst (list): The list of integers to sort.
+        order (str): The order to sort in. Options are 'ascending' or 'descending'.
+
+    Returns:
+        list: The sorted list.
+    """
+    if order == "ascending":
+        return sorted(lst)  # Sort in ascending order
+    elif order == "descending":
+        return sorted(lst, reverse=True)  # Sort in descending order
+    else:
+        raise ValueError("Invalid order. Use 'ascending' or 'descending'.")
+
+
+def create_random_odd_numbers(count, start, end):
+    """
+    Generates a list of random odd numbers.
+
+    Parameters:
+        count (int): The number of odd numbers to generate.
+        start (int): The lower bound of the range (inclusive).
+        end (int): The upper bound of the range (inclusive).
+
+    Returns:
+        list: A list of random odd numbers.
+    """
+    odd_numbers = []
+    while len(odd_numbers) < count:
+        num = random.randint(start, end)  # Generate a random number
+        if num % 2 != 0:  # Check if the number is odd
+            odd_numbers.append(num)
+    return odd_numbers
+
+
+def create_numbers_divisible_by_five_or_ten(rng: Random):
+    result = []
+    for i in range(NUM_OF_PAIRS_GENERATED):
+        if i % 2 == 0:
+            num = create_random_odd_numbers(1, 1, 1000)[0] * 5  # Random multiple of 5
+        else:
+            num = rng.randint(1, 100) * 10  # Random multiple of 10
+        result.append(num)
+    return result
+
+
+def generate_0(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where input remains unchanged"""
+    pairs = {}
+
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        input = str(input)
+        output = input
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_1(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of the third element
+    after removing all other elements
+    """
+    pairs = {}
+
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        target_idx = 2
+        output = [input[target_idx]]
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_2(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a reversed list of the input"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = input[::-1]
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_3(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is the sum of unique elements in the list less than 30"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        unique_input = list(set(input))
+
+        total_sum = 0
+        for num in unique_input:
+            if num < 30:
+                total_sum += num
+
+        input = str(input)
+        output = str([total_sum])
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_4(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is the count of elements equal to 5"""
+    pairs = {}
+    for i in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+
+        if i % 2 == 0:
+            input += create_list_of_fives(rng)
+
+        # Shuffle the new input with fives
+        rng.shuffle(input)
+
+        total_count = 0
+        for num in input:
+            if num == 5:
+                total_count += 1
+
+        input = str(input)
+        output = str([total_count])
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_5(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of elements that are followed by an even number
+
+    NOTE: This is suppose to be a relatively hard problem
+    """
+    pairs = {}
+    for i in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = []
+        for i in range(1, len(input)):
+
+            # If the current element is an even number, we then add previous element into output
+            if input[i] % 2 == 0:
+                output.append(input[i - 1])
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_6(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of elements where each element in input is added to its position(Using zero-indexing)"""
+    pairs = {}
+    for i in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = []
+        for i, num in enumerate(input):
+            element = i + num
+            output.append(element)
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_7(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of element whose position is indicated by the last element in the input
+
+    EXAMPLE:
+    1. [26, 88, 60, 1, 17, 75, 97, 89, 1] -> [88]
+    2.  [49, 71, 2, 61, 3]: [61]
+    """
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        # Create a chosen index between the bounds of the size of the input
+        chosen_index = rng.randint(0, len(input) - 1)
+        # Replace the last element with chosen_index
+        input[-1] = chosen_index
+        output = [input[chosen_index]]
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_8(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is count of elements in the input"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = [len(input)]
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_9(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is sum total of elements in the input"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = [sum(input)]
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_10(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of the elements in ascending order"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = sort_integers(input, order="ascending")
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_11(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of the elements in descending order"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        output = sort_integers(input, order="descending")
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_12(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a list of the elements where the first and last element in input are replaced by their
+    successor. Example, for an integer 4, successor is 5
+    """
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        # Create successor for first and last element using a copy of input
+        output = input.copy()
+        output[0] += 1
+        output[-1] += 1
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_13(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is [1] if list of input elements is in ascending order, [0] in descending order"""
+    pairs = {}
+    for i in range(NUM_OF_PAIRS_GENERATED):
+        input = create_random_list(rng)
+        if i % 2 == 0:
+            input = sort_integers(input, order="ascending")
+            output = [1]
+        else:
+            input = sort_integers(input, order="descending")
+            output = [0]
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_14(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is [1] if input element is divisible by 10, [0] if divisible by 5"""
+    pairs = {}
+
+    nums = create_numbers_divisible_by_five_or_ten(rng)
+    for num in nums:
+        if num % 10 == 0:
+            input = [num]
+            output = [1]
+        else:
+            input = [num]
+            output = [0]
+
+        input = str(input)
+        output = str(output)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_15(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is a twice the amount of last element in the input"""
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        starter_input = create_random_list(rng)
+        length = len(starter_input)
+        first_element = rng.choice(starter_input)
+        input = [first_element]
+
+        for _ in range(1, length):
+            prev = input[-1]
+            input.append(prev * 2)
+
+        # Create output here to prevent building on strings
+        output = str([input[-1] * 2])
+        input = str(input)
+        pairs[input] = output
+
+    return pairs
+
+
+def generate_16(rng: Random) -> Dict[str, Any]:
+    """Generate input and output pairs where output is built from a function 2x - 4
+    NOTE: This is suppose to be amazingly hard for the LLM.
+    """
+    pairs = {}
+    for _ in range(NUM_OF_PAIRS_GENERATED):
+        starter_input = create_random_list(rng)
+        first_element = rng.choice(starter_input)
+        output = (2 * first_element) - 4
+        input = str([first_element])
+        pairs[input] = str([output])
+
+    return pairs
diff --git a/reasoning_gym/induction/list_functions/list_functions.py b/reasoning_gym/induction/list_functions/list_functions.py
new file mode 100644
index 00000000..f91a683a
--- /dev/null
+++ b/reasoning_gym/induction/list_functions/list_functions.py
@@ -0,0 +1,81 @@
+"""List functions generators"""
+
+from dataclasses import dataclass
+from random import Random
+from typing import Any, Callable, Optional
+
+from reasoning_gym.factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class ListFunctionsDatasetConfig:
+    """Configuration for List function generators."""
+
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self) -> None:
+        """Validate configuration parameters"""
+        assert self.size > 0, "size must be positive"
+
+
+tasks = list(range(17))
+
+
+class ListFunctionsDataset(ProceduralDataset):
+
+    def __init__(self, config: ListFunctionsDatasetConfig):
+        super().__init__(config, config.seed, config.size)
+        self._generators: dict[int, Callable[[Random, float], dict[str, Any]]] = None  # initially None, lazy loading
+        self.task_indices = Random(self.seed).choices(tasks, k=self.size)
+        self.prompt_template = """You are an expert at inductive reasoning. Generate an output corresponding to the given input.
+The output is generated by applying the same rule that maps input to output for the examples provided. Your answer should be a list of element/elements
+Examples:
+{examples}
+
+Input: {input}
+Output:
+"""
+
+    @property
+    def generators(self) -> dict[int, Callable[[Random, float], dict[str, Any]]]:
+        """Lazy load generators only when first accessed"""
+        if self._generators is None:
+            self._generators = self._load_generators()
+        return self._generators
+
+    def _load_generators(self):
+        """
+        Generates mapper from task identifiers (keys) to example generator functions
+        """
+        from . import generators
+
+        def strip_prefix(s: str, prefix: str) -> str:
+            return s[len(prefix) :]
+
+        prefix = "generate_"
+        gs = {}
+        for n in dir(generators):
+            if n.startswith(prefix):
+                gs[int(strip_prefix(n, prefix))] = getattr(generators, n)
+        return gs
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single induction-based list function dataset"""
+        rng = Random(self.seed + idx)
+        generator_idx = self.task_indices[idx]
+        generator = self.generators[generator_idx]
+        examples = generator(rng)
+        entry = examples.popitem()
+        input = entry[0]
+        output = entry[1]
+        formatted_examples = ""
+        for index, key in enumerate(examples):
+            formatted_examples += f"""Input {index + 1}: {key}
+Output {index + 1}: {examples[key]}
+"""
+        question = self.prompt_template.format(examples=formatted_examples, input=input)
+        return {"question": question, "answer": output, "metadata": {}}
+
+
+register_dataset("list_functions", ListFunctionsDataset, ListFunctionsDatasetConfig)
diff --git a/reasoning_gym/logic/__init__.py b/reasoning_gym/logic/__init__.py
index 99c2d56f..db976fba 100644
--- a/reasoning_gym/logic/__init__.py
+++ b/reasoning_gym/logic/__init__.py
@@ -4,21 +4,19 @@ Logic tasks for training reasoning capabilities.
 
 from .aiw import AliceInWonderlandConfig, AliceInWonderlandDataset
 from .circuit_logic import CircuitLogicConfig, CircuitLogicDataset
+from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset
 from .self_reference import SelfReferenceConfig, SelfReferenceDataset
-from .syllogisms import SyllogismConfig, SyllogismDataset, Term
+from .syllogisms import SyllogismConfig, SyllogismDataset
 from .zebra_puzzles import ZebraConfig, ZebraDataset
 
-# from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset
-
 __all__ = [
     "AliceInWonderlandConfig",
     "AliceInWonderlandDataset",
-    # "PropositionalLogicConfig",
-    # "PropositionalLogicDataset",
+    "PropositionalLogicConfig",
+    "PropositionalLogicDataset",
     "SyllogismConfig",
     "SyllogismDataset",
     "syllogism_dataset",
-    "Term",
     "ZebraConfig",
     "ZebraDataset",
     "SelfReference",
diff --git a/reasoning_gym/logic/aiw.py b/reasoning_gym/logic/aiw.py
index 00448280..7cabbb85 100644
--- a/reasoning_gym/logic/aiw.py
+++ b/reasoning_gym/logic/aiw.py
@@ -2,7 +2,7 @@ from dataclasses import dataclass, field
 from enum import StrEnum
 from random import Random
 from string import Template
-from typing import List, Optional
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -20,15 +20,15 @@ class AliceInWonderlandConfig:
     """Configuration options for the Alice in Wonderland dataset.
 
     Attributes:
-        male_names (List[str]): List of male names to use in questions.
-        female_names (List[str]): List of female names to use in questions. Must include 'Alice'.
-        task_types (List[TaskType]): List of task types to include in dataset.
+        male_names (list[str]): List of male names to use in questions.
+        female_names (list[str]): List of female names to use in questions. Must include 'Alice'.
+        task_types (list[TaskType]): List of task types to include in dataset.
         seed (Optional[int]): Seed for random number generation.
         size (int): Number of samples in the dataset.
         max_entities (int): Max number of siblings/friends/colleagues in questions.
     """
 
-    male_names: List[str] = field(
+    male_names: list[str] = field(
         default_factory=lambda: [
             "James",
             "John",
@@ -43,7 +43,7 @@ class AliceInWonderlandConfig:
             "Bob",
         ]
     )
-    female_names: List[str] = field(
+    female_names: list[str] = field(
         default_factory=lambda: [
             "Mary",
             "Patricia",
@@ -58,7 +58,7 @@ class AliceInWonderlandConfig:
             "Alice",
         ]
     )
-    task_types: List[TaskType] = field(
+    task_types: list[TaskType] = field(
         default_factory=lambda: [TaskType.SIBLINGS, TaskType.FRIENDS, TaskType.COLLEAGUES]  # Added Colleagues
     )
     seed: Optional[int] = None
diff --git a/reasoning_gym/logic/circuit_logic.py b/reasoning_gym/logic/circuit_logic.py
index d2219bf8..fd169076 100644
--- a/reasoning_gym/logic/circuit_logic.py
+++ b/reasoning_gym/logic/circuit_logic.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -17,7 +17,7 @@ def _repeat(s: str, n: int) -> str:
     return s * n
 
 
-def _matrix_put(matrix: List[List[str]], h: int, w: int, x: int, y: int, s: str, direction: str):
+def _matrix_put(matrix: list[list[str]], h: int, w: int, x: int, y: int, s: str, direction: str):
     """Place a string `s` into the 2D `matrix` starting at (x,y),
     advancing in `direction` ('RIGHT' or 'DOWN')."""
     if x >= w or y >= h:
@@ -119,14 +119,14 @@ class CircuitLogicDataset(ProceduralDataset):
         self._current_idx = 0
         return self
 
-    def __next__(self) -> Dict[str, Any]:
+    def __next__(self) -> dict[str, Any]:
         if self._current_idx >= self.config.size:
             raise StopIteration
         item = self[self._current_idx]
         self._current_idx += 1
         return item
 
-    def __getitem__(self, idx: int) -> Dict[str, Any]:
+    def __getitem__(self, idx: int) -> dict[str, Any]:
         """
         Generate one random circuit logic item using ASCII drawing.
         """
@@ -142,14 +142,14 @@ class CircuitLogicDataset(ProceduralDataset):
 
     def _generate_circuit(
         self, rng: Random, num_terms: int, min_inputs: int, max_inputs: int, neg_prob: float, allow_reuse: bool
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Generate circuit logic (ASCII drawing + expression + evaluation)
         """
         final_gate_name, final_gate_sym = rng.choice(self.final_gate_options)
         final_gate_width = 2 + len(final_gate_sym)
 
-        distinct_inputs: List[str] = []
+        distinct_inputs: list[str] = []
 
         def get_random_input() -> str:
             if allow_reuse and distinct_inputs and rng.random() < 0.5:
@@ -159,8 +159,8 @@ class CircuitLogicDataset(ProceduralDataset):
                 distinct_inputs.append(name)
                 return name
 
-        term_ops: List[Tuple[str, str, str]] = []
-        term_strings: List[str] = []
+        term_ops: list[tuple[str, str, str]] = []
+        term_strings: list[str] = []
         for _ in range(num_terms):
             op = rng.choice(self.internal_ops)
             term_ops.append(op)
@@ -400,7 +400,7 @@ class CircuitLogicDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         if answer is None or len(answer) == 0:
             return 0.0
 
diff --git a/reasoning_gym/logic/contrib/logic_puzzle/clues.py b/reasoning_gym/logic/contrib/logic_puzzle/clues.py
index 1fddcc24..a753744e 100644
--- a/reasoning_gym/logic/contrib/logic_puzzle/clues.py
+++ b/reasoning_gym/logic/contrib/logic_puzzle/clues.py
@@ -14,7 +14,7 @@ from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from functools import wraps
 from itertools import product
-from typing import Iterable, List, Tuple
+from typing import Iterable
 
 from reasoning_gym.logic.contrib.logic_puzzle.literals import Literal
 from reasoning_gym.logic.contrib.logic_puzzle.sat_utils import from_dnf, neg
@@ -39,7 +39,7 @@ class Clue(ABC):
     """Base class for the types of clues that we allow."""
 
     @abstractmethod
-    def as_cnf(self) -> Iterable[Tuple[str]]: ...
+    def as_cnf(self) -> Iterable[tuple[str]]: ...
 
     @abstractmethod
     def __repr__(self) -> str: ...
@@ -67,7 +67,7 @@ class found_at(Clue):
     value: Literal
     house: int
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return [(comb(self.value, self.house),)]
 
     @_capitalize_first
@@ -89,7 +89,7 @@ class not_at(Clue):
     value: Literal
     house: int
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return [(neg(comb(self.value, self.house)),)]
 
     @_capitalize_first
@@ -110,9 +110,9 @@ class same_house(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf((comb(self.value1, i), comb(self.value2, i)) for i in self.houses)
 
     @_capitalize_first
@@ -134,9 +134,9 @@ class consecutive(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf((comb(self.value1, i), comb(self.value2, j)) for i, j in zip(self.houses, self.houses[1:]))
 
     @_capitalize_first
@@ -156,9 +156,9 @@ class beside(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf(
             [(comb(self.value1, i), comb(self.value2, j)) for i, j in zip(self.houses, self.houses[1:])]
             + [(comb(self.value2, i), comb(self.value1, j)) for i, j in zip(self.houses, self.houses[1:])]
@@ -182,9 +182,9 @@ class left_of(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf(
             (comb(self.value1, i), comb(self.value2, j)) for i, j in product(self.houses, self.houses) if i < j
         )
@@ -207,9 +207,9 @@ class right_of(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return sat_utils.from_dnf(
             (comb(self.value1, i), comb(self.value2, j)) for i, j in product(self.houses, self.houses) if i > j
         )
@@ -233,9 +233,9 @@ class one_between(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf(
             [(comb(self.value1, i), comb(self.value2, j)) for i, j in zip(self.houses, self.houses[2:])]
             + [(comb(self.value2, i), comb(self.value1, j)) for i, j in zip(self.houses, self.houses[2:])]
@@ -257,9 +257,9 @@ class two_between(Clue):
 
     value1: Literal
     value2: Literal
-    houses: Tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
+    houses: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3, 4, 5))
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         return from_dnf(
             [(comb(self.value1, i), comb(self.value2, j)) for i, j in zip(self.houses, self.houses[3:])]
             + [(comb(self.value2, i), comb(self.value1, j)) for i, j in zip(self.houses, self.houses[3:])]
diff --git a/reasoning_gym/logic/contrib/logic_puzzle/generate.py b/reasoning_gym/logic/contrib/logic_puzzle/generate.py
index 4012655b..9bc4bb02 100644
--- a/reasoning_gym/logic/contrib/logic_puzzle/generate.py
+++ b/reasoning_gym/logic/contrib/logic_puzzle/generate.py
@@ -7,7 +7,7 @@ This is a driver script that can be used to generate new zebra puzzles.
 from collections import OrderedDict
 from itertools import product
 from random import Random
-from typing import Dict, Iterable, List, Set, Tuple, Type
+from typing import Iterable, Type
 
 from tabulate import tabulate
 
@@ -18,18 +18,18 @@ from reasoning_gym.logic.contrib.logic_puzzle.sat_utils import itersolve
 from .clues import Clue, beside, consecutive, found_at, left_of, not_at, one_between, right_of, same_house, two_between
 
 
-def generate_found_at(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> Set[Clue]:
+def generate_found_at(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> set[Clue]:
     """Generate the `found_at` / `not_at` Clue instances"""
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for element, loc in solution.items():
         clues.add(found_at(element, loc))
 
     return clues
 
 
-def generate_not_found_at(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[Clue]:
+def generate_not_found_at(puzzle: Puzzle, solution: dict[Literal, int]) -> set[Clue]:
     """Generate the `found_at` / `not_at` Clue instances"""
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for element, loc in solution.items():
         for house in puzzle.houses:
             if house != loc:
@@ -38,13 +38,13 @@ def generate_not_found_at(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[C
     return clues
 
 
-def generate_same_house(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> Set[Clue]:
+def generate_same_house(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> set[Clue]:
     """Generate the `same_house` Clue instances"""
 
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for house in puzzle.houses:
         items_at_house = {item: loc for item, loc in solution.items() if loc == house}
-        pairs: Set[Tuple[Literal, Literal]] = {
+        pairs: set[tuple[Literal, Literal]] = {
             (item1, item2) for item1, item2 in product(items_at_house, repeat=2) if item1 != item2
         }
         for pair in pairs:
@@ -53,18 +53,18 @@ def generate_same_house(puzzle: Puzzle, solution: OrderedDict[Literal, int]) ->
     return clues
 
 
-def generate_consecutive_beside(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> Set[Clue]:
+def generate_consecutive_beside(puzzle: Puzzle, solution: OrderedDict[Literal, int]) -> set[Clue]:
     """Generate the `consecutive` / `beside` Clue instances
 
     (Note that consecutive is just a more informative version of beside. Since they have the same
     structure, for every possible combination we'll just keep one.
     """
 
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for left, right in zip(puzzle.houses, puzzle.houses[1:]):
         items_left = {item: loc for item, loc in solution.items() if loc == left}
         items_right = {item: loc for item, loc in solution.items() if loc == right}
-        pairs: Set[Tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
+        pairs: set[tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
         # sorted, no hash randomization
         for pair in sorted(pairs):
             # consecutive is just a more informative version of beside, but they have same structure
@@ -77,20 +77,20 @@ def generate_consecutive_beside(puzzle: Puzzle, solution: OrderedDict[Literal, i
     return clues
 
 
-def generate_left_right_of(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[Clue]:
+def generate_left_right_of(puzzle: Puzzle, solution: dict[Literal, int]) -> set[Clue]:
     """Generate the `left_of` / `right_of` Clue instances
     Note that since (x left-of y) is guaranteed to be redundant with (b right-of a), we only add
     one of these clues to the final set.
     """
 
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for left, right in product(puzzle.houses, puzzle.houses):
         if left >= right:
             continue
 
         items_left = {item: loc for item, loc in solution.items() if loc == left}
         items_right = {item: loc for item, loc in solution.items() if loc == right}
-        pairs: Set[Tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
+        pairs: set[tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
         # sorted, no hash randomization
         for pair in sorted(pairs):
             if puzzle.rng.randint(0, 1) == 0:
@@ -101,28 +101,28 @@ def generate_left_right_of(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[
     return clues
 
 
-def generate_one_between(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[Clue]:
+def generate_one_between(puzzle: Puzzle, solution: dict[Literal, int]) -> set[Clue]:
     """Generate the `one_between` Clue instances"""
 
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for left, right in zip(puzzle.houses, puzzle.houses[2:]):
         items_left = {item: loc for item, loc in solution.items() if loc == left}
         items_right = {item: loc for item, loc in solution.items() if loc == right}
-        pairs: Set[Tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
+        pairs: set[tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
         for pair in pairs:
             clues.add(one_between(pair[0], pair[1], puzzle.houses))
 
     return clues
 
 
-def generate_two_between(puzzle: Puzzle, solution: Dict[Literal, int]) -> Set[Clue]:
+def generate_two_between(puzzle: Puzzle, solution: dict[Literal, int]) -> set[Clue]:
     """Generate the `two_between` Clue instances"""
 
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
     for left, right in zip(puzzle.houses, puzzle.houses[3:]):
         items_left = {item: loc for item, loc in solution.items() if loc == left}
         items_right = {item: loc for item, loc in solution.items() if loc == right}
-        pairs: Set[Tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
+        pairs: set[tuple[Literal, Literal]] = {(item1, item2) for item1, item2 in product(items_left, items_right)}
         for pair in pairs:
             clues.add(two_between(pair[0], pair[1], puzzle.houses))
 
@@ -144,7 +144,7 @@ def has_unique_solution(puzzle: Puzzle, clues: Iterable[Clue]) -> bool:
             return False
 
 
-def try_to_remove(puzzle: Puzzle, clues: Set[Clue], n: int, must_have=set()) -> Set[Clue]:
+def try_to_remove(puzzle: Puzzle, clues: set[Clue], n: int, must_have=set()) -> set[Clue]:
     """
     Attempt to remove n clues from a set of candidate clues; if we are able to, return the new,
     smaller set of clues. If not, return the original set.
@@ -152,7 +152,7 @@ def try_to_remove(puzzle: Puzzle, clues: Set[Clue], n: int, must_have=set()) ->
 
     def weight(clue: Clue) -> float:
         # relative probabilities of each type of clue being selected for removal
-        weights: Dict[Type[Clue], float] = {
+        weights: dict[Type[Clue], float] = {
             not_at: 0.75,
             found_at: 0.75,
             same_house: 0.75,
@@ -167,7 +167,7 @@ def try_to_remove(puzzle: Puzzle, clues: Set[Clue], n: int, must_have=set()) ->
 
     # sorted, no hash randomization
     weights = [weight(clue) for clue in sorted(clues)]
-    candidates: Set[Clue] = set(puzzle.rng.choices(sorted(clues), weights, k=n))
+    candidates: set[Clue] = set(puzzle.rng.choices(sorted(clues), weights, k=n))
     candidates = candidates - must_have
     clues = clues.difference(candidates)
     if has_unique_solution(puzzle, clues):
@@ -180,8 +180,8 @@ def try_to_remove(puzzle: Puzzle, clues: Set[Clue], n: int, must_have=set()) ->
 
 
 def reduce_individually(
-    puzzle: Puzzle, clues: Set[Clue], removed: Set[Clue], must_have=set()
-) -> Tuple[Set[Clue], Set[Clue]]:
+    puzzle: Puzzle, clues: set[Clue], removed: set[Clue], must_have=set()
+) -> tuple[set[Clue], set[Clue]]:
     """
     Attempt to remove each candidate clue one by one.
 
@@ -202,7 +202,7 @@ def reduce_individually(
     return clues, removed
 
 
-def reduce_clues(puzzle: Puzzle, clues: Set[Clue], must_have=set()) -> Tuple[Set[Clue], Set[Clue]]:
+def reduce_clues(puzzle: Puzzle, clues: set[Clue], must_have=set()) -> tuple[set[Clue], set[Clue]]:
     """
     Reduce a set of clues to a minimally solvable set.
 
@@ -265,7 +265,7 @@ def reduce_clues(puzzle: Puzzle, clues: Set[Clue], must_have=set()) -> Tuple[Set
 
     # secondary reduction time! While we can still remove clues, do so; then we're done.
     # print(f"Starting the secondary reduction.")
-    removed_clues: Set[Clue] = set()
+    removed_clues: set[Clue] = set()
     while True:
         minimal_clues_size = len(minimal_clues)
         minimal_clues, removed_clues = reduce_individually(puzzle, minimal_clues, removed_clues, must_have)
@@ -304,12 +304,12 @@ def question_generation(rng: Random, col_name, table_data):
     return questions_data
 
 
-def generate_solution_dict(rng: Random, selected_elements: List[Literal], n: int) -> OrderedDict[Literal, int]:
+def generate_solution_dict(rng: Random, selected_elements: list[Literal], n: int) -> OrderedDict[Literal, int]:
     solution = OrderedDict()
     house_ids = list(range(1, n + 1))
     for element in selected_elements:
         rng.shuffle(house_ids)
-        attributes: List[Literal] = sorted(element.__members__.values())
+        attributes: list[Literal] = sorted(element.__members__.values())
         for i in range(n):
             solution[attributes[i]] = house_ids[i]
     return solution
@@ -376,7 +376,7 @@ def generate_puzzle(rng: Random, K=2, M=3) -> tuple[OrderedDict, Puzzle]:
     context = str(puzzle)
 
     # generate all the clues
-    clues: Set[Clue] = set()
+    clues: set[Clue] = set()
 
     for generate_function in clue_types:
         clues = clues.union(generate_function(puzzle, solution))
diff --git a/reasoning_gym/logic/contrib/logic_puzzle/puzzle.py b/reasoning_gym/logic/contrib/logic_puzzle/puzzle.py
index 6aba4545..640b3a0f 100644
--- a/reasoning_gym/logic/contrib/logic_puzzle/puzzle.py
+++ b/reasoning_gym/logic/contrib/logic_puzzle/puzzle.py
@@ -7,7 +7,7 @@ from __future__ import annotations
 
 from contextlib import contextmanager
 from random import Random
-from typing import Generator, Iterable, List, Set, Tuple, Type
+from typing import Generator, Iterable, Type
 
 from reasoning_gym.logic.contrib.logic_puzzle.clues import (
     Clue,
@@ -82,12 +82,12 @@ class Puzzle:
             self.literals = list(elements)
 
         self.houses = tuple(range(1, n_houses + 1))
-        self.clues: Set[Clue] = set()
-        self.constraints: List[Tuple[str]] = []
-        self.extra_clues: Set[Clue] = set()
+        self.clues: set[Clue] = set()
+        self.constraints: list[tuple[str]] = []
+        self.extra_clues: set[Clue] = set()
         self.solution = None
 
-    def _add_constraint(self, constraints: List[Tuple[str]]) -> Puzzle:
+    def _add_constraint(self, constraints: list[tuple[str]]) -> Puzzle:
         self.constraints.extend(constraints)
         return self
 
@@ -128,7 +128,7 @@ class Puzzle:
 
         return self
 
-    def as_cnf(self) -> List[Tuple[str]]:
+    def as_cnf(self) -> list[tuple[str]]:
         """Express puzzle as solvable CNF"""
 
         # this would be a comprehension if we could use iterable unpacking
@@ -195,8 +195,8 @@ they smoke, and what pet they own.
 """
 
 if __name__ == "__main__":
-    enum_classes: List[Type[Literal]] = [Color, Nationality, Animal, Drink, Cigar]
-    literals: List[Literal] = [el for group in enum_classes for el in group]
+    enum_classes: list[Type[Literal]] = [Color, Nationality, Animal, Drink, Cigar]
+    literals: list[Literal] = [el for group in enum_classes for el in group]
 
     # set up the puzzle with constraints and clues
     puzzle = Puzzle(rng=Random(), element_types=[Color, Nationality, Drink, Cigar, Animal])
@@ -245,7 +245,7 @@ in between them that neither is sitting in).
 """
 
 if __name__ == "__main__":
-    enum_classes: List[Type[Literal]] = [Mother, Children, Flower, Food]
+    enum_classes: list[Type[Literal]] = [Mother, Children, Flower, Food]
     literals = [el for group in enum_classes for el in group]
 
     # set up the puzzle with constraints and clues
diff --git a/reasoning_gym/logic/contrib/logic_puzzle/sat_utils.py b/reasoning_gym/logic/contrib/logic_puzzle/sat_utils.py
index df5e26e9..5454aef1 100644
--- a/reasoning_gym/logic/contrib/logic_puzzle/sat_utils.py
+++ b/reasoning_gym/logic/contrib/logic_puzzle/sat_utils.py
@@ -4,15 +4,15 @@ __author__ = "Raymond Hettinger"
 
 from functools import lru_cache
 from itertools import combinations
-from typing import Dict, FrozenSet, Iterable, List, Set, Tuple
+from typing import FrozenSet, Iterable
 
 import pycosat
 
 Element = str  # literal; any string, but here it's <element house#> e.g., "tushar 5" or "chai 2"
-CNF = List[Tuple[Element, ...]]
+CNF = list[tuple[Element, ...]]
 
 
-def make_translate(cnf: CNF) -> Tuple[Dict[Element, int], Dict[int, Element]]:
+def make_translate(cnf: CNF) -> tuple[dict[Element, int], dict[int, Element]]:
     """Make a translator from symbolic CNF to pycosat's numbered clauses.
 
     Return literal to number dictionary and reverse lookup dict.
@@ -22,7 +22,7 @@ def make_translate(cnf: CNF) -> Tuple[Dict[Element, int], Dict[int, Element]]:
      {1: 'a', 2: 'b', 3: 'c', -1: '~a', -3: '~c', -2: '~b'})
     """
 
-    lit2num: Dict[Element, int] = {}
+    lit2num: dict[Element, int] = {}
     for clause in cnf:
         for literal in clause:
             if literal not in lit2num:
@@ -36,7 +36,7 @@ def make_translate(cnf: CNF) -> Tuple[Dict[Element, int], Dict[int, Element]]:
     return lit2num, num2var
 
 
-def translate(cnf: CNF, uniquify=False) -> Tuple[List[Tuple[int, ...]], Dict[int, Element]]:
+def translate(cnf: CNF, uniquify=False) -> tuple[list[tuple[int, ...]], dict[int, Element]]:
     """Translate a symbolic CNF to a numbered CNF and return reverse mapping.
 
     >>> translate([['~P', 'Q'],['~P', 'R']])
@@ -78,14 +78,14 @@ def neg(element: str) -> str:
     return element[1:] if element.startswith("~") else "~" + element
 
 
-def from_dnf(groups: Iterable[Tuple[str, ...]]) -> CNF:
+def from_dnf(groups: Iterable[tuple[str, ...]]) -> CNF:
     """Convert from or-of-ands to and-of-ors
 
     >>> from_dnf([['~P'], ['Q', 'R']])
     [('~P', 'Q'), ('~P', 'R')]
     """
 
-    cnf: Set[FrozenSet[str]] = {frozenset()}
+    cnf: set[FrozenSet[str]] = {frozenset()}
     for group in groups:
         nl = {frozenset([literal]): neg(literal) for literal in group}
         # The "clause | literal" prevents dup lits: {x, x, y} -> {x, y}
@@ -134,7 +134,7 @@ class Q:
         return f"{self.__class__.__name__}(elements={self.elements!r})"
 
 
-def all_of(elements: List[Element]) -> CNF:
+def all_of(elements: list[Element]) -> CNF:
     """Forces inclusion of matching rows on a truth table"""
     return Q(elements) == len(elements)
 
diff --git a/reasoning_gym/logic/propositional_logic.py b/reasoning_gym/logic/propositional_logic.py
index 395c919f..dec8a5a7 100644
--- a/reasoning_gym/logic/propositional_logic.py
+++ b/reasoning_gym/logic/propositional_logic.py
@@ -1,13 +1,57 @@
 """Propositional logic task generator"""
 
+import re
 from dataclasses import dataclass
 from enum import StrEnum
 from random import Random
-from typing import Any, List, Optional, Set
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
 
+def parse_expr(expr: str):
+    expr = expr.strip()
+    if not expr:
+        raise ValueError("Empty expression")
+
+    if expr[0] == "(" and expr[-1] == ")":
+        level = 0
+        valid_enclosure = True
+        for char in expr[1:-1]:
+            if char == "(":
+                level += 1
+            elif char == ")":
+                level -= 1
+                if level < 0:
+                    valid_enclosure = False
+                    break
+        if level == 0 and valid_enclosure:
+            return parse_expr(expr[1:-1])
+
+    operators_by_precedence = [[Operator.IFF], [Operator.IMPLIES], [Operator.OR], [Operator.AND]]
+
+    for operator_level in operators_by_precedence:
+        level = 0
+        for i in range(len(expr) - 1, -1, -1):
+            char = expr[i]
+            if char == ")":
+                level += 1
+            elif char == "(":
+                level -= 1
+            elif level == 0:
+                for operator in operator_level:
+                    if expr[i : i + len(operator.value)] == operator.value:
+                        left_expr = expr[:i]
+                        right_expr = expr[i + len(operator.value) :]
+                        return Expression(operator, parse_expr(left_expr), parse_expr(right_expr))
+
+    if expr.startswith(Operator.NOT.value):
+        sub_expr = expr[len(Operator.NOT.value) :]
+        return Expression(Operator.NOT, parse_expr(sub_expr))
+
+    return Expression(None, expr)
+
+
 class Operator(StrEnum):
     """Basic logical operators"""
 
@@ -18,6 +62,24 @@ class Operator(StrEnum):
     IFF = "↔"
 
 
+QUESTION_FORMAT = "\n".join(
+    [
+        "The following question is a propositional logic reasoning question.",
+        "In the question we provide a list of premises",
+        "The task is to infer a correct conclusion from the premise.",
+        "FORMAT INSTRUCTIONS:",
+        "Return the conclusion logic statement, as your final answer.",
+        "Use the following notation to denote symbols",
+        "OR = \u2228",
+        "AND = \u2227",
+        "IMPLIES = \u2192",
+        "IFF = \u2194",
+        "NOT = \u00ac",
+        "Here is the question:",
+    ]
+)
+
+
 @dataclass
 class PropositionalLogicConfig:
     """Configuration for propositional logic task generation"""
@@ -63,6 +125,42 @@ class Expression:
             return self.left.evaluate(assignments) == self.right.evaluate(assignments)
         raise ValueError(f"Unknown operator: {self.operator}")
 
+    @classmethod
+    def from_string(cls, expr: str) -> "Expression":
+        parsed_expr = parse_expr(expr)
+        return cls(parsed_expr.operator, parsed_expr.left, parsed_expr.right)
+
+    def simplify(self):
+        if self.operator is None:
+            return self
+
+        simplified_left = self.left.simplify() if isinstance(self.left, Expression) else self.left
+        simplified_right = self.right.simplify() if self.right and isinstance(self.right, Expression) else self.right
+
+        if self.operator == Operator.NOT:
+            if isinstance(simplified_left, Expression) and simplified_left.operator == Operator.NOT:
+                return simplified_left.left
+            return Expression(Operator.NOT, simplified_left)
+
+        if self.operator in {Operator.AND, Operator.OR}:
+            if simplified_left is False and self.operator == Operator.OR:
+                return simplified_right
+            if simplified_left is True and self.operator == Operator.AND:
+                return simplified_right
+
+            if (simplified_left is True and self.operator == Operator.OR) or (
+                simplified_left is False and self.operator == Operator.AND
+            ):
+                return simplified_left
+
+            if simplified_left == simplified_right:
+                return simplified_left
+
+        if self.operator == Operator.IMPLIES:
+            return Expression(Operator.OR, Expression(Operator.NOT, simplified_left), simplified_right).simplify()
+
+        return Expression(self.operator, simplified_left, simplified_right)
+
     def __str__(self) -> str:
         if self.operator is None:
             return self.left
@@ -103,27 +201,27 @@ class PropositionalLogicDataset(ProceduralDataset):
         # Generate premises
         num_statements = rng.randint(self.config.min_statements, self.config.max_statements)
         premises = self._generate_premises(rng, variables, num_statements)
-
-        # Generate a valid conclusion
         conclusion = self._find_valid_conclusion(rng, premises, variables)
 
         # Format question
-        question = "Given:\n"
+        question = QUESTION_FORMAT
+        question += "Given:\n"
         for i, premise in enumerate(premises, 1):
-            question += f"{i}. {premise}\n"
-        question += "What can we conclude?"
+            question += f"{i}. {premise}\n."
+        question += "What can we conclude from the above statements?"
 
         return {
             "question": question,
-            "answer": str(conclusion),
+            "answer": None,
             "metadata": {
                 "premises": [str(p) for p in premises],
                 "variables": variables,
                 "complexity": self._measure_complexity(conclusion),
+                "example_answer": str(conclusion),
             },
         }
 
-    def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]:
+    def _generate_premises(self, rng: Random, variables: list[str], num_statements: int) -> list[Expression]:
         """Generate a list of premise statements"""
         premises = []
         for _ in range(num_statements):
@@ -131,11 +229,10 @@ class PropositionalLogicDataset(ProceduralDataset):
             premises.append(self._generate_expression(rng, variables, depth))
         return premises
 
-    def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression:
+    def _generate_expression(self, rng: Random, variables: list[str], depth: int) -> Expression:
         """Generate a random logical expression"""
         if depth <= 1:
             return Expression(None, rng.choice(variables))
-
         operator = rng.choice(list(Operator))
         if operator == Operator.NOT:
             return Expression(operator, self._generate_expression(rng, variables, depth - 1))
@@ -144,18 +241,17 @@ class PropositionalLogicDataset(ProceduralDataset):
             right = self._generate_expression(rng, variables, depth - 1)
             return Expression(operator, left, right)
 
-    def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression:
+    def _find_valid_conclusion(self, rng: Random, premises: list[Expression], variables: list[str]) -> Expression:
         """Find a valid conclusion that follows from the premises"""
-        # Try random conclusions until we find a valid one
         for _ in range(100):
-            candidate = self._generate_expression(rng, variables, 2)
-            if self._is_valid_conclusion(premises, candidate):
+            candidate = self._generate_expression(rng, variables, 2).simplify()
+            if self._is_valid_conclusion(premises, candidate) and not (self._is_trivial(candidate)):
                 return candidate
 
         # Fallback to a simple conclusion
         return Expression(None, variables[0])
 
-    def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool:
+    def _is_valid_conclusion(self, premises: list[Expression], conclusion: Expression) -> bool:
         """Check if conclusion follows from premises using truth tables"""
         variables = self._collect_variables(premises + [conclusion])
 
@@ -166,7 +262,7 @@ class PropositionalLogicDataset(ProceduralDataset):
                 return False
         return True
 
-    def _collect_variables(self, expressions: List[Expression]) -> Set[str]:
+    def _collect_variables(self, expressions: list[Expression]) -> set[str]:
         """Collect all variables used in expressions"""
         variables = set()
         for expr in expressions:
@@ -179,7 +275,7 @@ class PropositionalLogicDataset(ProceduralDataset):
                     variables.update(self._collect_variables([expr.right]))
         return variables
 
-    def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]:
+    def _generate_assignments(self, variables: set[str]) -> list[dict[str, bool]]:
         """Generate all possible truth value assignments"""
         assignments = []
         for i in range(2 ** len(variables)):
@@ -198,5 +294,40 @@ class PropositionalLogicDataset(ProceduralDataset):
         else:
             return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right)
 
+    def score_answer(self, answer: str | None, entry: dict[str, Any]) -> float:
+        """Robust scoring implementation for propositional logic answers"""
+        if not answer:
+            return 0.0
+
+        try:
+            cleaned_answer = answer
+
+            valid_vars = set(entry["metadata"]["variables"])
+            answer_vars = re.findall(r"([A-Z])", cleaned_answer)
+            if any(var not in valid_vars for var in answer_vars):
+                return 0.01
+
+            premises = [Expression.from_string(p) for p in entry["metadata"]["premises"]]
+            answer_expr = Expression.from_string(cleaned_answer)
+
+            if self._is_valid_conclusion(premises, answer_expr):
+                if self._is_trivial(answer_expr):
+                    return 0.25
+                else:
+                    return 1.0
+            return 0.05
+        except (ValueError, KeyError, AttributeError):
+            return 0.01
+
+    def _is_trivial(self, expr: Expression) -> bool:
+        """Check for trivial tautologies like P ∨ ¬P"""
+        if expr.operator is None:
+            return True
+        variables = self._collect_variables([expr])
+        for assignment in self._generate_assignments(variables):
+            if not expr.evaluate(assignment):
+                return False
+        return True
+
 
 register_dataset("propositional_logic", PropositionalLogicDataset, PropositionalLogicConfig)
diff --git a/reasoning_gym/logic/self_reference.py b/reasoning_gym/logic/self_reference.py
index d8155b4c..f42ce415 100644
--- a/reasoning_gym/logic/self_reference.py
+++ b/reasoning_gym/logic/self_reference.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -349,14 +349,14 @@ class SelfReferenceDataset(ProceduralDataset):
             "metadata": {},
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the SelfReference task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/logic/syllogisms.py b/reasoning_gym/logic/syllogisms.py
index 00e4a37b..9546ad1d 100644
--- a/reasoning_gym/logic/syllogisms.py
+++ b/reasoning_gym/logic/syllogisms.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass
 from enum import StrEnum
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
@@ -105,7 +105,7 @@ class SyllogismDataset(ProceduralDataset):
         super().__init__(config=config, seed=config.seed, size=config.size)
         self.terms = self.DEFAULT_TERMS
 
-    def _get_allowed_quantifiers(self) -> List[Quantifier]:
+    def _get_allowed_quantifiers(self) -> list[Quantifier]:
         """Get list of allowed quantifiers based on config"""
         quantifiers = []
         if self.config.allow_all:
@@ -120,9 +120,9 @@ class SyllogismDataset(ProceduralDataset):
 
     @staticmethod
     def _is_valid_syllogism(
-        premise1: Tuple[Quantifier, "Term", "Term"],
-        premise2: Tuple[Quantifier, "Term", "Term"],
-        conclusion: Tuple[Quantifier, "Term", "Term"],
+        premise1: tuple[Quantifier, "Term", "Term"],
+        premise2: tuple[Quantifier, "Term", "Term"],
+        conclusion: tuple[Quantifier, "Term", "Term"],
     ) -> bool:
         """
         Checks whether a given syllogism is valid under classical (Aristotelian) rules,
@@ -247,7 +247,7 @@ class SyllogismDataset(ProceduralDataset):
             return f"{quantifier.value} {subject.plural} are {predicate.plural}"
 
     def _check_logical_equivalence(
-        self, premise: Tuple[Quantifier, Term, Term], conclusion: Tuple[Quantifier, Term, Term]
+        self, premise: tuple[Quantifier, Term, Term], conclusion: tuple[Quantifier, Term, Term]
     ) -> bool:
         """Check if a conclusion is logically equivalent to a premise"""
         p_quant, p_subj, p_pred = premise
diff --git a/reasoning_gym/logic/zebra_puzzles.py b/reasoning_gym/logic/zebra_puzzles.py
index 38cbe051..c518c65d 100644
--- a/reasoning_gym/logic/zebra_puzzles.py
+++ b/reasoning_gym/logic/zebra_puzzles.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from random import Random
-from typing import Dict, Optional
+from typing import Any, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 from .contrib.logic_puzzle.generate import generate_puzzle
@@ -55,14 +55,14 @@ class ZebraDataset(ProceduralDataset):
             },
         }
 
-    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
         """Determine if the solution provided solves the Zebra task.
 
         The function awards 1.0 for a correct answer.
 
         Args:
             answer (Optional[str]): The user's answer.
-            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+            entry (dict[str, Any]): The original dataset entry containing the correct answer.
 
         Returns:
             float: The computed score between 0.0 and 1.0.
diff --git a/reasoning_gym/utils.py b/reasoning_gym/utils.py
index c59c06ca..02dbe461 100644
--- a/reasoning_gym/utils.py
+++ b/reasoning_gym/utils.py
@@ -79,3 +79,33 @@ def is_integer(obj: Any) -> bool:
     elif isinstance(obj, Fraction):
         return obj.denominator == 1
     return False
+
+
+def compute_decimal_reward(answer: Optional[str], oracle_answer: str, strip_commas: bool = True) -> float:
+    """Compute the reward for a given answer compared to the oracle answer.
+
+    Args:
+        answer: Answer provided by model
+        oracle_answer: Correct answer to the question
+        strip_commas: Whether to remove commas from answers e.g "1,000" = "1000"
+
+    Returns:
+        Reward value between 0.0 and 1.0
+    """
+    reward = 0.0
+    if answer is not None and len(answer) > 0:
+        reward = 0.01
+        try:
+            if strip_commas:
+                answer = answer.replace(",", "")
+                oracle_answer = oracle_answer.replace(",", "")
+
+            if Decimal(answer) == Decimal(oracle_answer):
+                reward = 1.0
+        except:
+            pass
+
+        if oracle_answer in answer:
+            reward = len(oracle_answer) / len(answer)
+
+    return reward
diff --git a/reasoning_gym/version_manager.py b/reasoning_gym/version_manager.py
index dbe19a09..9b01f075 100644
--- a/reasoning_gym/version_manager.py
+++ b/reasoning_gym/version_manager.py
@@ -1,6 +1,6 @@
 """Version manager for tracking dataset versions."""
 
-from typing import Dict, Optional, Tuple
+from typing import Any, Optional
 
 from .dataset import ProceduralDataset
 
@@ -12,7 +12,7 @@ class DatasetVersionManager:
         """Initialize the version manager."""
         self.current_version = 0
         # version_id -> (dataset_name, dataset_instance)
-        self.datasets: Dict[int, Tuple[str, ProceduralDataset]] = {}
+        self.datasets: dict[int, tuple[str, ProceduralDataset]] = {}
 
     def register_dataset(self, name: str, dataset: ProceduralDataset) -> int:
         """
@@ -29,7 +29,7 @@ class DatasetVersionManager:
         self.datasets[self.current_version] = (name, dataset)
         return self.current_version
 
-    def get_dataset(self, version_id: int) -> Optional[Tuple[str, ProceduralDataset]]:
+    def get_dataset(self, version_id: int) -> Optional[tuple[str, ProceduralDataset]]:
         """
         Retrieve a dataset by its version ID.
 
@@ -41,7 +41,7 @@ class DatasetVersionManager:
         """
         return self.datasets.get(version_id)
 
-    def get_entry(self, version_id: int, index: int) -> Dict[str, any]:
+    def get_entry(self, version_id: int, index: int) -> dict[str, Any]:
         """
         Get a specific entry from a versioned dataset.
 
diff --git a/tests/test_basic_arithmetic.py b/tests/test_basic_arithmetic.py
index c1035af9..6eda876a 100644
--- a/tests/test_basic_arithmetic.py
+++ b/tests/test_basic_arithmetic.py
@@ -74,7 +74,7 @@ def test_arithmetic_dataset_format_styles():
         max_digits=2,
     )
     dataset = BasicArithmeticDataset(config)
-    assert all(item["question"].strip().endswith(".") for item in dataset)
+    assert all(item["question"].strip().endswith(".") or item["question"].strip().endswith("?") for item in dataset)
 
 
 def test_arithmetic_dataset_iteration():
diff --git a/tests/test_binary_alternation.py b/tests/test_binary_alternation.py
new file mode 100644
index 00000000..81a581e6
--- /dev/null
+++ b/tests/test_binary_alternation.py
@@ -0,0 +1,104 @@
+"""Tests for Binary Alternation questions generation"""
+
+import pytest
+
+from reasoning_gym.algorithmic.binary_alternation import BinaryAlternationConfig, BinaryAlternationDataset
+
+
+def test_binary_alternation_config_validation():
+    """Test that invalid configs raise appropriate errors"""
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(max_n=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(max_n=0)  # Zero not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(min_n=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(min_n=0)  # Zero not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(p_solvable=-0.01)  # < 0 not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BinaryAlternationConfig(p_solvable=1.01)  # > 0 not allowed
+        config.validate()
+
+
+def test_binary_alternation_dataset_deterministic():
+    """Test that dataset generates same items with same seed"""
+    config = BinaryAlternationConfig(seed=42, size=10)
+    dataset1 = BinaryAlternationDataset(config)
+    dataset2 = BinaryAlternationDataset(config)
+
+    for i in range(len(dataset1)):
+        assert dataset1[i] == dataset2[i]
+
+
+def test_binary_alternation_dataset_items():
+    """Test basic properties of generated items"""
+    config = BinaryAlternationConfig(max_n=10, size=10, seed=42)
+    dataset = BinaryAlternationDataset(config)
+
+    for i in range(len(dataset)):
+        item = dataset[i]
+        # Check item structure
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Check metadata
+        assert "string" in item["metadata"]
+        assert "solution" in item["metadata"]
+        assert "solvable" in item["metadata"]
+
+        solution = item["metadata"]["solution"]
+        string = item["metadata"]["string"]
+        solvable = item["metadata"]["solvable"]
+
+        # Verify values
+        assert set(string) <= {"0", "1"}
+        if solution == -1:
+            assert not solvable
+            assert abs(string.count("1") - string.count("0")) > 1
+        else:
+            assert solvable
+            assert abs(string.count("1") - string.count("0")) <= 1
+
+
+def test_binary_alternation_dataset_iteration():
+    """Test that iteration respects dataset size"""
+    config = BinaryAlternationConfig(size=5, seed=42)
+    dataset = BinaryAlternationDataset(config)
+
+    items = list(dataset)
+    assert len(items) == config.size
+
+    # Test multiple iterations yield same items
+    assert items == list(dataset)
+
+
+def test_binary_alternation_answer():
+    """Verify the number of 1 bits in the binary representation of a number"""
+    config = BinaryAlternationConfig(size=5, seed=42)
+    dataset = BinaryAlternationDataset(config)
+
+    # Impossible
+    string = "1110"
+    assert dataset._get_answer(string) == -1
+
+    # Already alternating
+    string = "10101"
+    assert dataset._get_answer(string) == 0
+
+    # One shot example
+    string = "111000"
+    assert dataset._get_answer(string) == 1
diff --git a/tests/test_bitwise_arithmetic.py b/tests/test_bitwise_arithmetic.py
new file mode 100644
index 00000000..cbaff24b
--- /dev/null
+++ b/tests/test_bitwise_arithmetic.py
@@ -0,0 +1,118 @@
+import pytest
+
+from reasoning_gym.arithmetic.bitwise_arithmetic import BitwiseArithmeticConfig, BitwiseArithmeticDataset
+
+
+def test_bitwise_arithmetic_config_validation():
+    """Test that invalid configs raise appropriate errors"""
+    with pytest.raises(AssertionError):
+        config = BitwiseArithmeticConfig(difficulty=0)
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = BitwiseArithmeticConfig(difficulty=11)
+        config.validate()
+
+
+def test_bitwise_arithmetic_deterministic():
+    """Test that dataset generates same items with same seed"""
+    config = BitwiseArithmeticConfig(seed=42, size=10)
+    dataset1 = BitwiseArithmeticDataset(config)
+    dataset2 = BitwiseArithmeticDataset(config)
+
+    for i in range(len(dataset1)):
+        assert dataset1[i] == dataset2[i]
+
+
+def test_bitwise_arithmetic_items():
+    """Test basic properties of generated items"""
+    config = BitwiseArithmeticConfig(difficulty=1, size=100, seed=42)
+    dataset = BitwiseArithmeticDataset(config)
+
+    for i in range(len(dataset)):
+        item = dataset[i]
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Verify the answer matches the problem
+        problem = item["metadata"]["problem"]
+        answer = item["answer"]
+        assert dataset.score_answer(answer=answer, entry=item) == 1.0
+
+        # Test scoring edge cases
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+        assert dataset.score_answer(answer="invalid", entry=item) == 0.01
+
+
+def test_bitwise_arithmetic_difficulty_levels():
+    """Test that different difficulty levels produce appropriate complexity"""
+    for difficulty in [1, 2, 3]:
+        config = BitwiseArithmeticConfig(difficulty=difficulty, size=50, seed=42)
+        dataset = BitwiseArithmeticDataset(config)
+
+        for item in dataset:
+            # All items should be valid regardless of difficulty
+            assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+
+            # Higher difficulty should generally produce more operators
+            problem = item["metadata"]["problem"]
+            num_operators = sum(1 for c in problem if c in ["+", "-", "*", "<", ">"])
+
+            if difficulty == 1:
+                assert num_operators <= 2  # Simple expressions
+            elif difficulty >= 5:
+                # More complex expressions should exist at higher difficulties
+                found_complex = False
+                for item in dataset:
+                    if sum(1 for c in item["metadata"]["problem"] if c in ["+", "-", "*", "<", ">"]) > 2:
+                        found_complex = True
+                        break
+                assert found_complex
+
+
+def test_bitwise_arithmetic_iteration():
+    """Test that iteration respects dataset size"""
+    config = BitwiseArithmeticConfig(difficulty=1, size=5, seed=42)  # Small size for testing
+    dataset = BitwiseArithmeticDataset(config)
+
+    # Test manual iteration
+    items = []
+    for item in dataset:
+        items.append(item)
+    assert len(items) == config.size, "Iterator should yield exactly size items"
+
+    # Test list conversion
+    items = list(dataset)
+    assert len(items) == config.size, "Iterator should yield exactly size items"
+
+    # Test multiple iterations
+    first_items = list(dataset)
+    second_items = list(dataset)
+    assert first_items == second_items, "Multiple iterations should yield same items"
+
+
+def test_bitwise_arithmetic_answer_formats():
+    """Test that different answer formats are handled correctly"""
+    config = BitwiseArithmeticConfig(difficulty=1, size=10, seed=42)
+    dataset = BitwiseArithmeticDataset(config)
+
+    for item in dataset:
+        problem = item["metadata"]["problem"]
+        correct = item["answer"]
+
+        # Test hex string format
+        assert dataset.score_answer(answer=correct, entry=item) == 1.0
+
+        # Test decimal format
+        decimal_answer = str(eval(problem))  # Safe as we control the problem
+        assert dataset.score_answer(answer=decimal_answer, entry=item) == 1.0
+
+        # Test with "0x" prefix variations
+        if correct.startswith("-0x"):
+            # For negative numbers, keep the minus sign
+            assert dataset.score_answer(answer="-0x" + correct[3:], entry=item) == 1.0
+        elif not correct.startswith("0x"):
+            # For positive numbers without prefix
+            assert dataset.score_answer(answer="0x" + correct, entry=item) == 1.0
diff --git a/tests/test_coaching.py b/tests/test_coaching.py
index 3b1b97d7..56e58548 100644
--- a/tests/test_coaching.py
+++ b/tests/test_coaching.py
@@ -1,7 +1,6 @@
 import json
 import math
 from collections import OrderedDict
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/test_jugs.py b/tests/test_jugs.py
new file mode 100644
index 00000000..adc05511
--- /dev/null
+++ b/tests/test_jugs.py
@@ -0,0 +1,50 @@
+import json
+
+import pytest
+
+from reasoning_gym.algorithmic.jugs import JugsConfig, JugsDataset
+
+
+def test_jugs():
+    """Test basic properties and solution of generated items"""
+    config = JugsConfig(seed=42, size=1000, num_jugs=3, difficulty=5)
+    dataset = JugsDataset(config)
+
+    # easy
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = JugsConfig(seed=42, size=1, num_jugs=3, difficulty=50)
+    dataset = JugsDataset(config)
+
+    # med
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = JugsConfig(seed=42, size=1, num_jugs=3, difficulty=99)
+    dataset = JugsDataset(config)
+
+    # hard
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
diff --git a/tests/test_list_functions.py b/tests/test_list_functions.py
new file mode 100644
index 00000000..6e7ee0c0
--- /dev/null
+++ b/tests/test_list_functions.py
@@ -0,0 +1,84 @@
+from random import Random
+
+import pytest
+
+from reasoning_gym.induction.list_functions import ListFunctionsDataset, ListFunctionsDatasetConfig
+
+
+def test_list_functions_config_validation():
+    """Test that config validation works"""
+    config = ListFunctionsDatasetConfig(size=-1)
+    with pytest.raises(AssertionError):
+        config.validate()
+
+
+def test_list_functions_deterministic():
+    """Test that dataset generates same items with same seed"""
+    config = ListFunctionsDatasetConfig(seed=42, size=10)
+    dataset1 = ListFunctionsDataset(config)
+    dataset2 = ListFunctionsDataset(config)
+
+    for i in range(len(dataset1)):
+        assert dataset1[i] == dataset2[i]
+
+
+def test_list_functions_items():
+    """Test basic properties of generated items"""
+    config = ListFunctionsDatasetConfig(size=50, seed=42)
+    dataset = ListFunctionsDataset(config)
+
+    for i in range(len(dataset)):
+        item = dataset[i]
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert isinstance(item["question"], str)
+        assert isinstance(item["answer"], str)
+
+
+def test_list_functions_iteration():
+    """Test that iteration respects dataset size"""
+    config = ListFunctionsDatasetConfig(size=5, seed=42)  # Small size for testing
+    dataset = ListFunctionsDataset(config)
+
+    # Test manual iteration
+    items = []
+    for item in dataset:
+        items.append(item)
+    assert len(items) == config.size, "Iterator should yield exactly size items"
+
+    # Test list conversion
+    items = list(dataset)
+    assert len(items) == config.size, "Iterator should yield exactly size items"
+
+    # Test multiple iterations
+    first_items = list(dataset)
+    second_items = list(dataset)
+    assert first_items == second_items, "Multiple iterations should yield same items"
+
+
+def test_list_functions_generators():
+    """Test generator loading and access"""
+    config = ListFunctionsDatasetConfig()
+    dataset = ListFunctionsDataset(config)
+
+    # Test lazy loading
+    assert dataset._generators is None
+    _ = dataset.generators  # Access to trigger loading
+    assert dataset._generators is not None
+
+    # Test generator mapping
+    assert isinstance(dataset.generators, dict)
+    assert len(dataset.generators) > 0
+    i = 0
+    rng = Random(18)
+    for key in sorted(dataset.generators.keys()):
+        generator = dataset.generators[key]
+        assert callable(generator)
+
+        print(i, key)
+        for _ in range(10):
+            x = generator(rng)
+            assert isinstance(x, dict)
+
+        i += 1
diff --git a/tests/test_needle_haystack.py b/tests/test_needle_haystack.py
new file mode 100644
index 00000000..fb279da2
--- /dev/null
+++ b/tests/test_needle_haystack.py
@@ -0,0 +1,72 @@
+import pytest
+
+from reasoning_gym.cognition.needle_haystack import NeedleHaystackConfig, NeedleHaystackDataset
+
+
+def test_needle_haystack():
+    """Test basic properties and solution of generated items"""
+    config = NeedleHaystackConfig(seed=42, size=50, num_statements=50)
+    dataset = NeedleHaystackDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer="david bowie rules", entry=item) == 0.01
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = NeedleHaystackConfig(seed=42, size=1, num_statements=500)
+    dataset = NeedleHaystackDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = NeedleHaystackConfig(seed=42, size=1, num_statements=5000)
+    dataset = NeedleHaystackDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = NeedleHaystackConfig(seed=42, size=1, num_statements=50000)
+    dataset = NeedleHaystackDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    config = NeedleHaystackConfig(seed=42, size=1, num_statements=500000)
+    dataset = NeedleHaystackDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
diff --git a/tests/test_propositional_logic.py b/tests/test_propositional_logic.py
index aff2ebc3..c73ab1cc 100644
--- a/tests/test_propositional_logic.py
+++ b/tests/test_propositional_logic.py
@@ -87,3 +87,17 @@ def test_propositional_logic_dataset_iteration():
 
     # Test multiple iterations yield same items
     assert items == list(dataset)
+
+
+def test_propositional_logic_dataset_score_answer_correct():
+    dataset = PropositionalLogicDataset(PropositionalLogicConfig(size=50, seed=101))
+    for i, item in enumerate(dataset):
+        score = dataset.score_answer(item["metadata"]["example_answer"], item)
+        assert score == 1.0
+
+
+def test_propositional_logic_dataset_score_answer_incorrect():
+    dataset = PropositionalLogicDataset(PropositionalLogicConfig(size=100, seed=101))
+    for i, item in enumerate(dataset):
+        score = dataset.score_answer("Wrong", item)
+        assert score == 0.01
diff --git a/tests/test_rotting_oranges.py b/tests/test_rotting_oranges.py
new file mode 100644
index 00000000..070fb319
--- /dev/null
+++ b/tests/test_rotting_oranges.py
@@ -0,0 +1,119 @@
+"""Tests for Binary Matrix questions generation"""
+
+import pytest
+
+from reasoning_gym.algorithmic.rotten_oranges import RottenOrangesConfig, RottenOrangesDataset
+
+
+def test_rotten_oranges_config_validation():
+    """Test that invalid configs raise appropriate errors"""
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(max_n=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(max_n=0)  # Zero not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(min_n=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(min_n=0)  # Zero not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(p_oranges=0)  # <= 0 not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(p_oranges=1.01)  # > 1 not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(p_rotten=0)  # <= 0 not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = RottenOrangesConfig(p_rotten=1.01)  # > 1 not allowed
+        config.validate()
+
+
+def test_rotten_oranges_dataset_deterministic():
+    """Test that dataset generates same items with same seed"""
+    config = RottenOrangesConfig(seed=42, size=10)
+    dataset1 = RottenOrangesDataset(config)
+    dataset2 = RottenOrangesDataset(config)
+
+    for i in range(len(dataset1)):
+        assert dataset1[i] == dataset2[i]
+
+
+def test_rotten_oranges_dataset_items():
+    """Test basic properties of generated items"""
+    config = RottenOrangesConfig(min_n=10, max_n=15, size=10, seed=42)
+    dataset = RottenOrangesDataset(config)
+
+    for i in range(len(dataset)):
+        item = dataset[i]
+        # Check item structure
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Check metadata
+        assert "matrix" in item["metadata"]
+        assert "solution" in item["metadata"]
+
+        matrix = item["metadata"]["matrix"]
+
+        # Verify dimensions
+        assert config.min_n <= len(matrix) <= config.max_n
+        assert all(config.min_n <= len(row) <= config.max_n for row in matrix)
+        for r in range(len(matrix)):
+            for c in range(len(matrix[0])):
+                assert matrix[r][c] in [0, 1, 2]
+
+
+def test_rotten_oranges_dataset_iteration():
+    """Test that iteration respects dataset size"""
+    config = RottenOrangesConfig(size=5, seed=42)
+    dataset = RottenOrangesDataset(config)
+
+    items = list(dataset)
+    assert len(items) == config.size
+
+    # Test multiple iterations yield same items
+    assert items == list(dataset)
+
+
+def test_rotten_oranges_answer():
+    """Test the _get_distances method"""
+    config = RottenOrangesConfig(seed=42)
+    dataset = RottenOrangesDataset(config)
+
+    # All oranges are rotten
+    matrix = [
+        [2, 2, 2],
+        [2, 2, 2],
+        [2, 2, 2],
+    ]
+    assert dataset._get_answer(matrix) == 0
+
+    # All oranges are healthy
+    matrix = [
+        [1, 1, 1],
+        [1, 1, 1],
+        [1, 1, 1],
+    ]
+    assert dataset._get_answer(matrix) == -1
+
+    # 1 shot example
+    matrix = [
+        [2, 1, 1],
+        [1, 1, 0],
+        [0, 1, 1],
+    ]
+    assert dataset._get_answer(matrix) == 4
diff --git a/tools/cli/rgc/client.py b/tools/cli/rgc/client.py
index 4808a0fe..48791377 100644
--- a/tools/cli/rgc/client.py
+++ b/tools/cli/rgc/client.py
@@ -105,7 +105,7 @@ class RGClient:
         response.raise_for_status()
         return BatchResponse.model_validate(response.json())
 
-    def score_outputs(self, experiment: str, entry_answers: List[AnswerItem]) -> ScoringResponse:
+    def score_outputs(self, experiment: str, entry_answers: list[AnswerItem]) -> ScoringResponse:
         """Score a batch of answers.
 
         Args:
diff --git a/tools/server/models.py b/tools/server/models.py
index 6c873b08..1b6b505c 100644
--- a/tools/server/models.py
+++ b/tools/server/models.py
@@ -1,6 +1,6 @@
 """Pydantic models for API request/response data."""
 
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Optional
 
 from pydantic import BaseModel, Field
 
@@ -11,7 +11,7 @@ class ExperimentCreate(BaseModel):
     name: str = Field(..., description="Unique name for the experiment")
     size: int = Field(500, description="Size of the dataset")
     seed: Optional[int] = Field(None, description="Random seed for reproducibility")
-    datasets: Dict[str, Dict[str, Any]] = Field(..., description="Dictionary of datasets configurations")
+    datasets: dict[str, dict[str, Any]] = Field(..., description="Dictionary of datasets configurations")
 
 
 class ExperimentResponse(BaseModel):
@@ -20,19 +20,19 @@ class ExperimentResponse(BaseModel):
     name: str = Field(..., description="Name of the experiment")
     size: int = Field(..., description="Size of the dataset")
     seed: Optional[int] = Field(None, description="Random seed used")
-    datasets: Dict[str, Dict[str, Any]] = Field(..., description="Current dataset configurations")
+    datasets: dict[str, dict[str, Any]] = Field(..., description="Current dataset configurations")
 
 
 class ExperimentList(BaseModel):
     """Response model for listing experiments."""
 
-    experiments: List[str] = Field(default_factory=list, description="List of registered experiment names")
+    experiments: list[str] = Field(default_factory=list, description="List of registered experiment names")
 
 
 class DatasetConfigUpdate(BaseModel):
     """Request model for updating dataset configuration."""
 
-    config: Dict[str, Any] = Field(..., description="Configuration parameters to update")
+    config: dict[str, Any] = Field(..., description="Configuration parameters to update")
 
 
 class ErrorResponse(BaseModel):
@@ -46,13 +46,13 @@ class BatchEntry(BaseModel):
 
     question: str = Field(..., description="The question text")
     entry_id: str = Field(..., description="Unique identifier in format '{version}.{index}'")
-    metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata about the entry")
+    metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata about the entry")
 
 
 class BatchResponse(BaseModel):
     """Response containing a batch of entries"""
 
-    entries: List[BatchEntry] = Field(..., description="List of batch entries")
+    entries: list[BatchEntry] = Field(..., description="List of batch entries")
 
 
 class AnswerItem(BaseModel):
@@ -65,11 +65,11 @@ class AnswerItem(BaseModel):
 class ScoringRequest(BaseModel):
     """Request for scoring model outputs"""
 
-    answers: List[AnswerItem] = Field(..., description="List of entries to score")
+    answers: list[AnswerItem] = Field(..., description="List of entries to score")
 
 
 class ScoringResponse(BaseModel):
     """Response containing scores for answers"""
 
-    scores: List[float] = Field(..., description="List of scores in same order as request")
-    entry_ids: List[str] = Field(..., description="List of entry_ids in same order as request")
+    scores: list[float] = Field(..., description="List of scores in same order as request")
+    entry_ids: list[str] = Field(..., description="List of entry_ids in same order as request")