diff --git a/GALLERY.md b/GALLERY.md
index f0a2caba..e161b85d 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -11,6 +11,7 @@ This gallery shows examples from all available datasets using their default conf
 - [family_relationships](#family-relationships)
 - [figlet_font](#figlet-font)
 - [fraction_simplification](#fraction-simplification)
+- [game_of_life](#game-of-life)
 - [gcd](#gcd)
 - [lcm](#lcm)
 - [leg_counting](#leg-counting)
@@ -35,7 +36,7 @@ This gallery shows examples from all available datasets using their default conf
 - [word_sorting](#word-sorting)
 
 ## Dataset Examples
-### base_conversion {base-conversion}
+### base_conversion
 Generates base conversion tasks
 
 Default configuration:
@@ -51,23 +52,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Convert the base-5 number 21e to base-3
-Answer: 21e
-Metadata: {'decimal_value': 542, 'source_base': 5, 'target_base': 3, 'source_repr': '21e', 'target_repr': '21e'}
+Question: Convert the base-15 number 15 to binary
+Answer: 10101
+Metadata: {'decimal_value': 21, 'source_base': 15, 'target_base': 2, 'source_repr': '15', 'target_repr': '10101'}
 
 Example 2:
-Question: Convert the base-6 number f7 to base-3
-Answer: f7
-Metadata: {'decimal_value': 247, 'source_base': 6, 'target_base': 3, 'source_repr': 'f7', 'target_repr': 'f7'}
+Question: Convert the base-15 number de to base-6
+Answer: de
+Metadata: {'decimal_value': 222, 'source_base': 15, 'target_base': 6, 'source_repr': 'de', 'target_repr': 'de'}
 
 Example 3:
-Question: Convert the base-4 number 25d to base-14 (use lowercase letters a-z for digits above 9)
-Answer: 25d
-Metadata: {'decimal_value': 605, 'source_base': 4, 'target_base': 14, 'source_repr': '25d', 'target_repr': '25d'}
+Question: Convert the base-10 number 4e to binary
+Answer: 1001110
+Metadata: {'decimal_value': 78, 'source_base': 10, 'target_base': 2, 'source_repr': '4e', 'target_repr': '1001110'}
 
 ```
 
-### basic_arithmetic {basic-arithmetic}
+### basic_arithmetic
 Dataset that generates basic arithmetic tasks with configurable complexity
 
 Default configuration:
@@ -88,23 +89,23 @@ whitespace = single
 Example tasks:
 ```
 Example 1:
-Question: 7035 / 1005 =
-Answer: 7
-Metadata: {'num_terms': 2, 'num_digits': 4, 'expression': '7035 / 1005'}
+Question: 19 + 61 * -43 / 1 + 89 - 98 =
+Answer: -2613
+Metadata: {'num_terms': 6, 'num_digits': 2, 'expression': '19 + 61 * -43 / 1 + 89 - 98'}
 
 Example 2:
-Question: -743 + 475 + 719 + -155 - -768 =
-Answer: 1064
-Metadata: {'num_terms': 5, 'num_digits': 3, 'expression': '-743 + 475 + 719 + -155 - -768'}
+Question: ( 9240 + -702 ) =
+Answer: 8538
+Metadata: {'num_terms': 2, 'num_digits': 4, 'expression': '( 9240 + -702 )'}
 
 Example 3:
-Question: 898 / 2 + 357 + -664 * -496 =
-Answer: 330150
-Metadata: {'num_terms': 5, 'num_digits': 3, 'expression': '898 / 2 + 357 + -664 * -496'}
+Question: -68 * 12 - 6 / 2 + -60 =
+Answer: -879
+Metadata: {'num_terms': 5, 'num_digits': 2, 'expression': '-68 * 12 - 6 / 2 + -60'}
 
 ```
 
-### caesar_cipher {caesar-cipher}
+### caesar_cipher
 Generates Caesar cipher encryption/decryption tasks
 
 Default configuration:
@@ -121,23 +122,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Decrypt this Caesar cipher text: B EJTDVTTJPO XBT HPJOH PO XIFO IF FOUFSFE
-Answer: A DISCUSSION WAS GOING ON WHEN HE ENTERED
-Metadata: {'rotation': 1, 'cipher_text': 'B EJTDVTTJPO XBT HPJOH PO XIFO IF FOUFSFE', 'clear_text': 'A DISCUSSION WAS GOING ON WHEN HE ENTERED'}
+Question: Decrypt this Caesar cipher text: UVYAO MVY AOL VM IBA AOL ZVBAO MVY AOL SHAPUZ
+Answer: NORTH FOR THE OF BUT THE SOUTH FOR THE LATINS
+Metadata: {'rotation': 7, 'cipher_text': 'UVYAO MVY AOL VM IBA AOL ZVBAO MVY AOL SHAPUZ', 'clear_text': 'NORTH FOR THE OF BUT THE SOUTH FOR THE LATINS'}
 
 Example 2:
-Question: Decrypt this Caesar cipher text: Q IU BQZML YCQBM BQZML LW GWC VWB BPQVS BPIB I JIBP EWCTL ZMNZMAP
-Answer: I AM TIRED QUITE TIRED DO YOU NOT THINK THAT A BATH WOULD REFRESH
-Metadata: {'rotation': 8, 'cipher_text': 'Q IU BQZML YCQBM BQZML LW GWC VWB BPQVS BPIB I JIBP EWCTL ZMNZMAP', 'clear_text': 'I AM TIRED QUITE TIRED DO YOU NOT THINK THAT A BATH WOULD REFRESH'}
+Question: Decrypt this Caesar cipher text: ER MRHITIRHIRX KSZIVRQIRX
+Answer: AN INDEPENDENT GOVERNMENT
+Metadata: {'rotation': 4, 'cipher_text': 'ER MRHITIRHIRX KSZIVRQIRX', 'clear_text': 'AN INDEPENDENT GOVERNMENT'}
 
 Example 3:
-Question: Decrypt this Caesar cipher text: Y IGLE GQ FC
-Answer: A KING IS HE
-Metadata: {'rotation': 24, 'cipher_text': 'Y IGLE GQ FC', 'clear_text': 'A KING IS HE'}
+Question: Decrypt this Caesar cipher text: IYE WKI ECO DRSC OLYYU PYB XOKBVI KXI ZEBZYCO CEMR KC MBOKDSYX YP NOBSFKDSFO ZOBPYBWKXMOC KXN BOCOKBMR
+Answer: YOU MAY USE THIS EBOOK FOR NEARLY ANY PURPOSE SUCH AS CREATION OF DERIVATIVE PERFORMANCES AND RESEARCH
+Metadata: {'rotation': 10, 'cipher_text': 'IYE WKI ECO DRSC OLYYU PYB XOKBVI KXI ZEBZYCO CEMR KC MBOKDSYX YP NOBSFKDSFO ZOBPYBWKXMOC KXN BOCOKBMR', 'clear_text': 'YOU MAY USE THIS EBOOK FOR NEARLY ANY PURPOSE SUCH AS CREATION OF DERIVATIVE PERFORMANCES AND RESEARCH'}
 
 ```
 
-### chain_sum {chain-sum}
+### chain_sum
 Generates simple arithmetic tasks using only + and - operators
 
 Default configuration:
@@ -154,23 +155,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: 47 - 18 - 85 + 54 =
-Answer: -2
-Metadata: {'num_terms': 4, 'num_digits': 2, 'expression': '47 - 18 - 85 + 54'}
+Question: 3 - 6 + 4 =
+Answer: 1
+Metadata: {'num_terms': 3, 'num_digits': 1, 'expression': '3 - 6 + 4'}
 
 Example 2:
-Question: 52 + 23 - 88 + 78 + 92 - 54 =
-Answer: 103
-Metadata: {'num_terms': 6, 'num_digits': 2, 'expression': '52 + 23 - 88 + 78 + 92 - 54'}
+Question: 6516 - 9002 - 5380 - 2663 =
+Answer: -10529
+Metadata: {'num_terms': 4, 'num_digits': 4, 'expression': '6516 - 9002 - 5380 - 2663'}
 
 Example 3:
-Question: 46 + 76 + 75 + 46 + 70 - 88 =
-Answer: 225
-Metadata: {'num_terms': 6, 'num_digits': 2, 'expression': '46 + 76 + 75 + 46 + 70 - 88'}
+Question: 3352 + 3153 - 3475 + 1726 - 8711 - 7863 =
+Answer: -11818
+Metadata: {'num_terms': 6, 'num_digits': 4, 'expression': '3352 + 3153 - 3475 + 1726 - 8711 - 7863'}
 
 ```
 
-### color_cube_rotation {color-cube-rotation}
+### color_cube_rotation
 Generates color cube rotation reasoning tasks
 
 Default configuration:
@@ -185,56 +186,58 @@ Example tasks:
 ```
 Example 1:
 Question: A cube has:
-- a white top side
-- a silver right side
-- a brown front side
-- a violet left side
-- a pink back side
-- a purple bottom side
-
-The cube is rotated so that the side which was before at the left is now at the top.
-
-Then the cube is rotated to bring the bottom side to the top.
-
-Now the cube is rotated to place its back side at the top.
-
-What is now the color of the top side of the cube?
-Answer: brown
-Metadata: {'initial_state': {'top': 'white', 'right': 'silver', 'front': 'brown', 'left': 'violet', 'back': 'pink', 'bottom': 'purple'}, 'rotations': ['left', 'bottom', 'back'], 'target_side': 'top', 'num_rotations': 3}
-
-Example 2:
-Question: A cube has:
-- a violet top side
-- a green right side
-- a white front side
-- a blue left side
-- a gold back side
-- a yellow bottom side
-
-The cube is rotated so that the side which was before at the right is now at the top.
-
-What is now the color of the bottom side of the cube?
-Answer: blue
-Metadata: {'initial_state': {'top': 'violet', 'right': 'green', 'front': 'white', 'left': 'blue', 'back': 'gold', 'bottom': 'yellow'}, 'rotations': ['right'], 'target_side': 'bottom', 'num_rotations': 1}
-
-Example 3:
-Question: A cube has:
-- a magenta top side
-- a green right side
-- a brown front side
-- a yellow left side
+- a red top side
+- a brown right side
+- a cyan front side
+- a gray left side
 - a silver back side
-- a violet bottom side
+- a purple bottom side
 
 The cube is rotated so that the side which was before at the front is now at the top.
 
-What is now the color of the bottom side of the cube?
+Now the cube is rotated to place its right side at the top.
+
+What is now the color of the top side of the cube?
+Answer: brown
+Metadata: {'initial_state': {'top': 'red', 'right': 'brown', 'front': 'cyan', 'left': 'gray', 'back': 'silver', 'bottom': 'purple'}, 'rotations': ['front', 'right'], 'target_side': 'top', 'num_rotations': 2}
+
+Example 2:
+Question: A cube has:
+- a yellow top side
+- a cyan right side
+- a white front side
+- a blue left side
+- a red back side
+- a pink bottom side
+
+The cube is rotated so that the side which was before at the left is now at the top.
+
+Then the cube is rotated to bring the front side to the top.
+
+Next, the front side is rotated to become the top face.
+
+What is now the color of the front side of the cube?
+Answer: red
+Metadata: {'initial_state': {'top': 'yellow', 'right': 'cyan', 'front': 'white', 'left': 'blue', 'back': 'red', 'bottom': 'pink'}, 'rotations': ['left', 'front', 'front'], 'target_side': 'front', 'num_rotations': 3}
+
+Example 3:
+Question: A cube has:
+- a indigo top side
+- a violet right side
+- a silver front side
+- a pink left side
+- a magenta back side
+- a cyan bottom side
+
+The cube is rotated so that the side which was before at the front is now at the top.
+
+What is now the color of the top side of the cube?
 Answer: silver
-Metadata: {'initial_state': {'top': 'magenta', 'right': 'green', 'front': 'brown', 'left': 'yellow', 'back': 'silver', 'bottom': 'violet'}, 'rotations': ['front'], 'target_side': 'bottom', 'num_rotations': 1}
+Metadata: {'initial_state': {'top': 'indigo', 'right': 'violet', 'front': 'silver', 'left': 'pink', 'back': 'magenta', 'bottom': 'cyan'}, 'rotations': ['front'], 'target_side': 'top', 'num_rotations': 1}
 
 ```
 
-### countdown {countdown}
+### countdown
 Generates Countdown Number Game tasks
 
 Default configuration:
@@ -254,26 +257,26 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Using the numbers 4, 15, 90, 49, 15, create an expression that equals 275.
-You can only use each number once.
-Answer: 49*90/15 - 15 - 4
-Metadata: {'numbers': [4, 15, 90, 49, 15], 'target': 275, 'expression': '49*90/15 - 15 - 4'}
+Question: Calculate 421 using the numbers 10, 30, 26, 59.
+Each number may be used at most once.
+Answer: 30*(26 - 10) - 59
+Metadata: {'numbers': [10, 30, 26, 59], 'target': 421, 'expression': '30*(26 - 10) - 59'}
 
 Example 2:
-Question: Calculate 237 using the numbers 32, 56, 64, 23, 3, 100.
+Question: Calculate 229 using the numbers 55, 80, 34, 60.
 Each number may be used at most once.
-Answer: 100*3 - 64 - 23 - 32 + 56
-Metadata: {'numbers': [32, 56, 64, 23, 3, 100], 'target': 237, 'expression': '100*3 - 64 - 23 - 32 + 56'}
+Answer: 80 + 34 + 60 + 55
+Metadata: {'numbers': [55, 80, 34, 60], 'target': 229, 'expression': '80 + 34 + 60 + 55'}
 
 Example 3:
-Question: Find a way to make 241 using some or all of these numbers: 87, 85, 82, 13.
-Each number can only be used once.
-Answer: 85 + 82 - 13 + 87
-Metadata: {'numbers': [87, 85, 82, 13], 'target': 241, 'expression': '85 + 82 - 13 + 87'}
+Question: Calculate 840 using the numbers 41, 18, 32, 45, 84.
+Each number may be used at most once.
+Answer: 84*(41 - 45 + 32 - 18)
+Metadata: {'numbers': [41, 18, 32, 45, 84], 'target': 840, 'expression': '84*(41 - 45 + 32 - 18)'}
 
 ```
 
-### family_relationships {family-relationships}
+### family_relationships
 Generates family relationship reasoning tasks
 
 Default configuration:
@@ -289,29 +292,29 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Charles is married to Jessica. They have a child called Atlas. Atlas is married to Amelia. They have children called Patricia and Nova. David is married to Ava. They have a child called Amelia.
+Question: Jack is married to Elizabeth. They have a child called Oliver. Oliver is married to Abigail. They have a child called Logan. Alexander is married to Mia. They have a child called Abigail.
 
-How is Jessica related to Nova?
-Answer: grandmother
-Metadata: {'person1': 'Jessica', 'person2': 'Nova', 'relationship': 'grandmother', 'family_size': 8}
+What relation is Mia to Abigail?
+Answer: mother
+Metadata: {'person1': 'Mia', 'person2': 'Abigail', 'relationship': 'mother', 'family_size': 7}
 
 Example 2:
-Question: David is married to Charlotte. They have a child called Lucas. Lucas is married to Victoria. They have children called James and Abigail.
+Question: James is married to Sarah. They have a child called Atlas. Atlas is married to Sophie. They have children called Jennifer and Aria.
 
-What is Victoria to Abigail?
-Answer: mother
-Metadata: {'person1': 'Victoria', 'person2': 'Abigail', 'relationship': 'mother', 'family_size': 6}
+What is Aria to Jennifer?
+Answer: sister
+Metadata: {'person1': 'Aria', 'person2': 'Jennifer', 'relationship': 'sister', 'family_size': 6}
 
 Example 3:
-Question: Mason is married to Amelia. They have a child called James. James is married to Grace. They have a child called Abigail.
+Question: Lucas is married to Willow. They have a child called Samuel. Samuel is married to Zoe. They have a child called William. Henry is married to Emma. They have a child called Zoe.
 
-What relation is James to Amelia?
-Answer: son
-Metadata: {'person1': 'James', 'person2': 'Amelia', 'relationship': 'son', 'family_size': 5}
+What is Lucas to Willow?
+Answer: husband
+Metadata: {'person1': 'Lucas', 'person2': 'Willow', 'relationship': 'husband', 'family_size': 7}
 
 ```
 
-### figlet_font {figlet-font}
+### figlet_font
 Generates FigletFont tasks
 
 Default configuration:
@@ -326,44 +329,45 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: What word does this say?
+Question: Please read the following figlet font:
 
- __          _
-(_    |_|   / \   | |   |\|
-__)   | |   \_/   |^|   | |
+  ()     _     _       _    _ __     ()  ,
+  /\    ' )   /       | )  ' )  )    /`-'|
+ /  )    / / /    ,---|/    /  /    /   /
+/__/__  (_(_/      \_/ \_  /  (_   /__-<_
 
-Answer: SHOWN
-Metadata: {'font': 'bigfig', 'space_letters': True}
+
+
+Answer: SWING
+Metadata: {'font': 'slscript', 'space_letters': True}
 
 Example 2:
 Question: What word does this say?
 
-  #####  ### ###     ##    ######    ######
- ## ###  ##   ##   #####   ###  ##  ######
-##       ##   ##   ## ###  ##   ##     ##
-##       #######  ##   ##  ##  ##      ##
-##       ##   ##  #######  #####       ##
-#####    ##   ##  ##  ##    ## ##      ##
- #####    #    #  #   #     ##  ##      #
+     dBBBP     dBBBBBb        dBBBP     dBP dBP    dBBBP
+                    BB
+   dBP          dBP BB      dBP       dBBBBBP    dBBP
+  dBP          dBP  BB     dBP       dBP dBP    dBP
+ dBBBBP       dBBBBBBB    dBBBBP    dBP dBP    dBBBBP
 
 
-Answer: CHART
-Metadata: {'font': 'future_6', 'space_letters': True}
+Answer: CACHE
+Metadata: {'font': 'trek', 'space_letters': True}
 
 Example 3:
 Question: Please read the following figlet font:
 
-.dP"Y8     88  88     888888        db        88""Yb
-`Ybo."     88  88     88__         dPYb       88__dP
-o.`Y8b     888888     88""        dP__Yb      88"Yb
-8bodP'     88  88     888888     dP""""Yb     88  Yb
+.---. .---. .-. .-..-. .-..-.
+ \ \  | |-' | | | .` |  >  /
+`---' `-'   `-' `-'`-'  `-'
 
-Answer: SHEAR
-Metadata: {'font': '4max', 'space_letters': True}
+
+Answer: SPINY
+Metadata: {'font': 'linux', 'space_letters': True}
 
 ```
 
-### fraction_simplification {fraction-simplification}
+### fraction_simplification
 Generates fraction simplification tasks
 
 Default configuration:
@@ -380,23 +384,175 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Simplify the fraction $12054/36848$ to its lowest terms
-Answer: $123/376$
-Metadata: {'numerator': 12054, 'denominator': 36848, 'simplified_numerator': 123, 'simplified_denominator': 376, 'reduction_factor': 98, 'style': 'latex_inline'}
+Question: Simplify the fraction $1380/6180$ to its lowest terms
+Answer: $23/103$
+Metadata: {'numerator': 1380, 'denominator': 6180, 'simplified_numerator': 23, 'simplified_denominator': 103, 'reduction_factor': 60, 'style': 'latex_inline'}
 
 Example 2:
-Question: Simplify the fraction 1218/28275 to its lowest terms
-Answer: 14/325
-Metadata: {'numerator': 1218, 'denominator': 28275, 'simplified_numerator': 14, 'simplified_denominator': 325, 'reduction_factor': 87, 'style': 'plain'}
+Question: Simplify the fraction 15552/49984 to its lowest terms
+Answer: 243/781
+Metadata: {'numerator': 15552, 'denominator': 49984, 'simplified_numerator': 243, 'simplified_denominator': 781, 'reduction_factor': 64, 'style': 'plain'}
 
 Example 3:
-Question: Simplify the fraction 21902/24111 to its lowest terms
-Answer: 466/513
-Metadata: {'numerator': 21902, 'denominator': 24111, 'simplified_numerator': 466, 'simplified_denominator': 513, 'reduction_factor': 47, 'style': 'plain'}
+Question: Simplify the fraction $56100/80500$ to its lowest terms
+Answer: $561/805$
+Metadata: {'numerator': 56100, 'denominator': 80500, 'simplified_numerator': 561, 'simplified_denominator': 805, 'reduction_factor': 100, 'style': 'latex_inline'}
 
 ```
 
-### gcd {gcd}
+### game_of_life
+Generates Game of Life games with configurable parameters
+
+Default configuration:
+```python
+grid_size_x = 20
+grid_size_y = 20
+filled_cells = 100
+simulation_steps = 1
+seed = None
+size = 500
+```
+
+Example tasks:
+```
+Example 1:
+Question: What will this Game of Life board look like after 1 steps of simulation?
+
+[[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
+ [0 1 1 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0]
+ [0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1]
+ [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 0 1]
+ [0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 1 0 0 0 0 1 0 1 0 1 1 1 0 1 0 0 1]
+ [0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 0]
+ [0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0]
+ [0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0]
+ [1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1]
+ [0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0]
+ [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0]
+ [0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1]
+ [0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0]
+ [0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0]
+ [1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0]]
+Answer: [[1 0 0 1 1 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0]
+ [0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1]
+ [0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0]
+ [0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0]
+ [0 0 1 0 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 0]
+ [0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0]
+ [0 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0]
+ [0 1 0 0 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0]
+ [0 1 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0]
+ [0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0]
+ [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
+ [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
+ [0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1]
+ [0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0 0 0]
+ [0 0 0 0 0 1 1 1 0 0 1 1 0 0 1 1 1 1 0 0]
+ [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0]]
+Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1}
+
+Example 2:
+Question: What will this Game of Life board look like after 1 steps of simulation?
+
+[[0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0]
+ [0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0]
+ [1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0]
+ [0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0]
+ [0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1]
+ [0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1]
+ [1 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0]
+ [0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0]
+ [1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1]
+ [0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1]
+ [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0]
+ [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1]
+ [1 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 0]
+ [0 1 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 0 0 0]
+ [0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0]
+ [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
+ [0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0]
+ [0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0]]
+Answer: [[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0]
+ [0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 1 1 1]
+ [0 0 0 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0 0]
+ [0 0 0 0 1 1 1 0 1 1 0 1 1 0 1 0 0 0 0 1]
+ [1 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 0 0 1 1]
+ [0 1 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 0 1 1]
+ [1 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0]
+ [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0]
+ [0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 1]
+ [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]
+ [0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1]
+ [1 1 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0 0]
+ [0 1 0 0 0 0 1 1 0 1 1 1 1 0 0 1 0 0 0 0]
+ [0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0]
+ [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 0]]
+Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1}
+
+Example 3:
+Question: What will this Game of Life board look like after 1 steps of simulation?
+
+[[1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1]
+ [0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0]
+ [0 0 0 1 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0]
+ [0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 1]
+ [1 1 0 0 0 1 1 0 0 0 0 1 1 1 0 1 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0]
+ [0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0]
+ [0 0 1 1 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0]
+ [0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0]
+ [0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1]
+ [0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0]
+ [0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0]
+ [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 1 1 1 0 0 1 1 0 0 0 1 1 0 0]
+ [0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 1]
+ [0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0]
+ [1 0 0 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0]]
+Answer: [[1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1]
+ [0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0]
+ [0 0 1 1 1 0 0 1 0 0 0 1 1 0 1 0 0 0 0 0]
+ [1 1 1 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0 0]
+ [1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0]
+ [0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 0 0]
+ [0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0]
+ [0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 0]
+ [0 1 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0]
+ [0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0]
+ [0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0]
+ [0 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0]
+ [0 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
+ [0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0]]
+Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1}
+
+```
+
+### gcd
 Generates Greatest Common Divisor (GCD) tasks
 
 Default configuration:
@@ -412,23 +568,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Find the Greatest Common Divisor (GCD) of these numbers: 384, 414
-Answer: 6
-Metadata: {'numbers': [384, 414], 'result': 6}
+Question: Find the Greatest Common Divisor (GCD) of these numbers: 226, 512
+Answer: 2
+Metadata: {'numbers': [226, 512], 'result': 2}
 
 Example 2:
-Question: Find the Greatest Common Divisor (GCD) of these numbers: 298, 803
-Answer: 1
-Metadata: {'numbers': [298, 803], 'result': 1}
+Question: Find the Greatest Common Divisor (GCD) of these numbers: 999, 495
+Answer: 9
+Metadata: {'numbers': [999, 495], 'result': 9}
 
 Example 3:
-Question: Find the Greatest Common Divisor (GCD) of these numbers: 846, 550
-Answer: 2
-Metadata: {'numbers': [846, 550], 'result': 2}
+Question: Find the Greatest Common Divisor (GCD) of these numbers: 999, 719
+Answer: 1
+Metadata: {'numbers': [999, 719], 'result': 1}
 
 ```
 
-### lcm {lcm}
+### lcm
 Generates Least Common Multiple (LCM) tasks
 
 Default configuration:
@@ -444,23 +600,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Find the Least Common Multiple (LCM) of these numbers: 33, 84
-Answer: 924
-Metadata: {'numbers': [33, 84], 'result': 924}
+Question: Find the Least Common Multiple (LCM) of these numbers: 30, 69
+Answer: 690
+Metadata: {'numbers': [30, 69], 'result': 690}
 
 Example 2:
-Question: Find the Least Common Multiple (LCM) of these numbers: 16, 23
-Answer: 368
-Metadata: {'numbers': [16, 23], 'result': 368}
+Question: Find the Least Common Multiple (LCM) of these numbers: 57, 99
+Answer: 1881
+Metadata: {'numbers': [57, 99], 'result': 1881}
 
 Example 3:
-Question: Find the Least Common Multiple (LCM) of these numbers: 66, 88
-Answer: 264
-Metadata: {'numbers': [66, 88], 'result': 264}
+Question: Find the Least Common Multiple (LCM) of these numbers: 3, 24
+Answer: 24
+Metadata: {'numbers': [3, 24], 'result': 24}
 
 ```
 
-### leg_counting {leg-counting}
+### leg_counting
 Generates leg counting arithmetic tasks
 
 Default configuration:
@@ -475,23 +631,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: How many legs are there in total if you have 2 scorpions, 3 sea slugs, 2 cockroachs, 2 fireflys?
-Answer: 40
-Metadata: {'animals': {'scorpion': 2, 'sea slug': 3, 'cockroach': 2, 'firefly': 2}, 'total_legs': 40}
+Question: How many legs are there in total if you have 1 starfish, 3 crabs, 3 chickens, 3 cows, 1 woodlouse?
+Answer: 67
+Metadata: {'animals': {'starfish': 1, 'crab': 3, 'chicken': 3, 'cow': 3, 'woodlouse': 1}, 'total_legs': 67}
 
 Example 2:
-Question: How many legs are there in total if you have 2 shrimps, 2 deers?
-Answer: 28
-Metadata: {'animals': {'shrimp': 2, 'deer': 2}, 'total_legs': 28}
+Question: How many legs are there in total if you have 2 sheeps, 1 butterfly, 1 ant, 3 humans, 2 wasps?
+Answer: 38
+Metadata: {'animals': {'sheep': 2, 'butterfly': 1, 'ant': 1, 'human': 3, 'wasp': 2}, 'total_legs': 38}
 
 Example 3:
-Question: How many legs are there in total if you have 1 beetle, 3 spiders, 1 jellyfish?
-Answer: 30
-Metadata: {'animals': {'beetle': 1, 'spider': 3, 'jellyfish': 1}, 'total_legs': 30}
+Question: How many legs are there in total if you have 3 chickens, 3 cockroachs, 3 woodlouses, 2 elephants, 2 sea slugs?
+Answer: 74
+Metadata: {'animals': {'chicken': 3, 'cockroach': 3, 'woodlouse': 3, 'elephant': 2, 'sea slug': 2}, 'total_legs': 74}
 
 ```
 
-### letter_counting {letter-counting}
+### letter_counting
 Generates letter counting tasks from text spans
 
 Default configuration:
@@ -505,23 +661,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: How many times does the letter "s" appear in the text: "a varied assortment is always in readiness A subscription"?
-Answer: 8
-Metadata: {'span_length': 9, 'target_letter': 's', 'span': ['a', 'varied', 'assortment', 'is', 'always', 'in', 'readiness', 'A', 'subscription']}
+Question: How many times does the letter "r" appear in the text: "You decline All is over then murmured the British agent sadly The"?
+Answer: 4
+Metadata: {'span_length': 12, 'target_letter': 'r', 'span': ['You', 'decline', 'All', 'is', 'over', 'then', 'murmured', 'the', 'British', 'agent', 'sadly', 'The']}
 
 Example 2:
-Question: How many times does the letter "c" appear in the text: "exclaims every one present Yes answers"?
+Question: How many times does the letter "l" appear in the text: "coffined and laid in a tomb Time went on September 25th 2889"?
 Answer: 1
-Metadata: {'span_length': 6, 'target_letter': 'c', 'span': ['exclaims', 'every', 'one', 'present', 'Yes', 'answers']}
+Metadata: {'span_length': 12, 'target_letter': 'l', 'span': ['coffined', 'and', 'laid', 'in', 'a', 'tomb', 'Time', 'went', 'on', 'September', '25th', '2889']}
 
 Example 3:
-Question: How many times does the letter "f" appear in the text: "individual Project Gutenberg electronic work is derived from texts"?
-Answer: 1
-Metadata: {'span_length': 9, 'target_letter': 'f', 'span': ['individual', 'Project', 'Gutenberg', 'electronic', 'work', 'is', 'derived', 'from', 'texts']}
+Question: How many times does the letter "i" appear in the text: "to the works took more time than he had anticipated It was"?
+Answer: 4
+Metadata: {'span_length': 12, 'target_letter': 'i', 'span': ['to', 'the', 'works', 'took', 'more', 'time', 'than', 'he', 'had', 'anticipated', 'It', 'was']}
 
 ```
 
-### letter_jumble {letter-jumble}
+### letter_jumble
 Generates word letter jumbling tasks
 
 Default configuration:
@@ -540,23 +696,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Unscramble these words: Acemira bnleogs ot
-Answer: America belongs to
-Metadata: {'num_words': 3, 'corruption_level': 0.3687132105849005, 'scrambled_words': ['Acemira', 'bnleogs', 'ot'], 'original_words': ['America', 'belongs', 'to']}
+Question: Unscramble these words: moon abotu faec hA trehe s somethnig ni htat driec eht owt nem ta ocne dnA dndeei
+Answer: moon about face Ah there s something in that cried the two men at once And indeed
+Metadata: {'num_words': 17, 'corruption_level': 0.16056171448414203, 'scrambled_words': ['moon', 'abotu', 'faec', 'hA', 'trehe', 's', 'somethnig', 'ni', 'htat', 'driec', 'eht', 'owt', 'nem', 'ta', 'ocne', 'dnA', 'dndeei'], 'original_words': ['moon', 'about', 'face', 'Ah', 'there', 's', 'something', 'in', 'that', 'cried', 'the', 'two', 'men', 'at', 'once', 'And', 'indeed']}
 
 Example 2:
-Question: Unscramble these words: cubssribres ton noly
-Answer: subscribers not only
-Metadata: {'num_words': 3, 'corruption_level': 0.38741746634525664, 'scrambled_words': ['cubssribres', 'ton', 'noly'], 'original_words': ['subscribers', 'not', 'only']}
+Question: Unscramble these words: lla het aosssen eth msea I psrooep ot od toshmeign etrtbe itlsl amrnsTrfo toni aeht a tiooprn fo het
+Answer: all the seasons the same I propose to do something better still Transform into heat a portion of the
+Metadata: {'num_words': 19, 'corruption_level': 0.8984516776838924, 'scrambled_words': ['lla', 'het', 'aosssen', 'eth', 'msea', 'I', 'psrooep', 'ot', 'od', 'toshmeign', 'etrtbe', 'itlsl', 'amrnsTrfo', 'toni', 'aeht', 'a', 'tiooprn', 'fo', 'het'], 'original_words': ['all', 'the', 'seasons', 'the', 'same', 'I', 'propose', 'to', 'do', 'something', 'better', 'still', 'Transform', 'into', 'heat', 'a', 'portion', 'of', 'the']}
 
 Example 3:
-Question: Unscramble these words: yuo peerntd ttha yuo exepct ot cantmafuure a mnhau iegnb uot adn uot Wyh ton rM itSmh cndedaav
-Answer: you pretend that you expect to manufacture a human being out and out Why not Mr Smith advanced
-Metadata: {'num_words': 18, 'corruption_level': 0.5094277166629008, 'scrambled_words': ['yuo', 'peerntd', 'ttha', 'yuo', 'exepct', 'ot', 'cantmafuure', 'a', 'mnhau', 'iegnb', 'uot', 'adn', 'uot', 'Wyh', 'ton', 'rM', 'itSmh', 'cndedaav'], 'original_words': ['you', 'pretend', 'that', 'you', 'expect', 'to', 'manufacture', 'a', 'human', 'being', 'out', 'and', 'out', 'Why', 'not', 'Mr', 'Smith', 'advanced']}
+Question: Unscramble these words: od ubt si ti fo yna sue Waht ew need si csoudl ont iarn oG dais eh addressing
+Answer: do but is it of any use What we need is clouds not rain Go said he addressing
+Metadata: {'num_words': 18, 'corruption_level': 0.21786426698317396, 'scrambled_words': ['od', 'ubt', 'si', 'ti', 'fo', 'yna', 'sue', 'Waht', 'ew', 'need', 'si', 'csoudl', 'ont', 'iarn', 'oG', 'dais', 'eh', 'addressing'], 'original_words': ['do', 'but', 'is', 'it', 'of', 'any', 'use', 'What', 'we', 'need', 'is', 'clouds', 'not', 'rain', 'Go', 'said', 'he', 'addressing']}
 
 ```
 
-### maze {maze}
+### maze
 Generates mazes with guaranteed shortest path distance from start to goal
     within [min_dist, max_dist].
 
@@ -573,63 +729,57 @@ size = 50
 Example tasks:
 ```
 Example 1:
-Question: Navigate from 'a' (start) to ':' (goal):
+Question: Navigate from 'F' (start) to 'S' (goal):
 
-```xxxxxxxxxx
-xxxx?xx:xx
-xxxx??x??x
-xx????x??x
-xxx?x???xx
-x?x?????xx
-x??ax???xx
-x???xxx??x
-x????x?xxx
-xxxxxxxxxx```
-Legend: 'x' = Wall, '?' = Passage
+```DDDDDDD
+D]D]]DD
+DD]DD]D
+DDS]]]D
+D]]D]]D
+D]]]]FD
+DDDDDDD```
+Legend: 'D' = Wall, ']' = Passage
+
+What is the minimum number of steps to reach the goal?
+Answer: 5
+Metadata: {'grid_size': 7, 'grid': ['DDDDDDD', 'D]D]]DD', 'DD]DD]D', 'DDS]]]D', 'D]]D]]D', 'D]]]]FD', 'DDDDDDD'], 'shortest_path_length': 5, 'start': 'F', 'goal': 'S', 'wall': 'D', 'path': ']'}
+
+Example 2:
+Question: Navigate from 'V' (start) to 'S' (goal):
+
+```77777777
+77SUU777
+7U7UUUU7
+77UUU777
+7UU7UUU7
+77U7UUU7
+7UUU7UV7
+77777777```
+Legend: '7' = Wall, 'U' = Passage
 
 What is the minimum number of steps to reach the goal?
 Answer: 9
-Metadata: {'grid_size': 10, 'grid': ['xxxxxxxxxx', 'xxxx?xx:xx', 'xxxx??x??x', 'xx????x??x', 'xxx?x???xx', 'x?x?????xx', 'x??ax???xx', 'x???xxx??x', 'x????x?xxx', 'xxxxxxxxxx'], 'shortest_path_length': 9, 'start': 'a', 'goal': ':', 'wall': 'x', 'path': '?'}
-
-Example 2:
-Question: Navigate from '"' (start) to '}' (goal):
-
-```444444444
-4##4#4##4
-44}444444
-44##4#444
-4#####"44
-4##4####4
-444#####4
-4##4#4444
-444444444```
-Legend: '4' = Wall, '#' = Passage
-
-What is the minimum number of steps to reach the goal?
-Answer: 6
-Metadata: {'grid_size': 9, 'grid': ['444444444', '4##4#4##4', '44}444444', '44##4#444', '4#####"44', '4##4####4', '444#####4', '4##4#4444', '444444444'], 'shortest_path_length': 6, 'start': '"', 'goal': '}', 'wall': '4', 'path': '#'}
+Metadata: {'grid_size': 8, 'grid': ['77777777', '77SUU777', '7U7UUUU7', '77UUU777', '7UU7UUU7', '77U7UUU7', '7UUU7UV7', '77777777'], 'shortest_path_length': 9, 'start': 'V', 'goal': 'S', 'wall': '7', 'path': 'U'}
 
 Example 3:
-Question: Navigate from '(' (start) to '$' (goal):
+Question: Navigate from 'z' (start) to '4' (goal):
 
-```eeeeeeeee
-e(%%%%%ee
-e%%%%%eee
-ee%eee%ee
-e%%%%%$%e
-e%%%%e%ee
-e%%%%%%%e
-ee%%%e%%e
-eeeeeeeee```
-Legend: 'e' = Wall, '%' = Passage
+```$$$$$$$
+$~~~~~$
+$$~$~~$
+$~$~$4$
+$$~~~~$
+$~z~~~$
+$$$$$$$```
+Legend: '$' = Wall, '~' = Passage
 
 What is the minimum number of steps to reach the goal?
-Answer: 8
-Metadata: {'grid_size': 9, 'grid': ['eeeeeeeee', 'e(%%%%%ee', 'e%%%%%eee', 'ee%eee%ee', 'e%%%%%$%e', 'e%%%%e%ee', 'e%%%%%%%e', 'ee%%%e%%e', 'eeeeeeeee'], 'shortest_path_length': 8, 'start': '(', 'goal': '$', 'wall': 'e', 'path': '%'}
+Answer: 5
+Metadata: {'grid_size': 7, 'grid': ['$$$$$$$', '$~~~~~$', '$$~$~~$', '$~$~$4$', '$$~~~~$', '$~z~~~$', '$$$$$$$'], 'shortest_path_length': 5, 'start': 'z', 'goal': '4', 'wall': '$', 'path': '~'}
 
 ```
 
-### mini_sudoku {mini-sudoku}
+### mini_sudoku
 Generates 4x4 sudoku puzzles with configurable difficulty
 
 Default configuration:
@@ -644,43 +794,43 @@ Example tasks:
 ```
 Example 1:
 Question: Solve this 4x4 Mini Sudoku puzzle:
-_ 3 _ 1
-2 1 _ _
-_ _ _ 2
-3 2 _ 4
-Answer: 4 3 2 1
-2 1 4 3
-1 4 3 2
+1 _ _ _
+_ 4 _ _
+_ _ _ 3
+_ _ 1 4
+Answer: 1 3 4 2
+2 4 3 1
+4 1 2 3
 3 2 1 4
-Metadata: {'puzzle': [[0, 3, 0, 1], [2, 1, 0, 0], [0, 0, 0, 2], [3, 2, 0, 4]], 'solution': [[4, 3, 2, 1], [2, 1, 4, 3], [1, 4, 3, 2], [3, 2, 1, 4]], 'num_empty': 8}
+Metadata: {'puzzle': [[1, 0, 0, 0], [0, 4, 0, 0], [0, 0, 0, 3], [0, 0, 1, 4]], 'solution': [[1, 3, 4, 2], [2, 4, 3, 1], [4, 1, 2, 3], [3, 2, 1, 4]], 'num_empty': 11}
 
 Example 2:
 Question: Solve this 4x4 Mini Sudoku puzzle:
-1 _ _ _
-_ _ 1 _
-2 _ _ _
-3 4 _ _
-Answer: 1 2 3 4
-4 3 1 2
-2 1 4 3
-3 4 2 1
-Metadata: {'puzzle': [[1, 0, 0, 0], [0, 0, 1, 0], [2, 0, 0, 0], [3, 4, 0, 0]], 'solution': [[1, 2, 3, 4], [4, 3, 1, 2], [2, 1, 4, 3], [3, 4, 2, 1]], 'num_empty': 11}
+_ _ _ 2
+2 _ _ 4
+_ 4 _ _
+_ 2 4 _
+Answer: 4 3 1 2
+2 1 3 4
+1 4 2 3
+3 2 4 1
+Metadata: {'puzzle': [[0, 0, 0, 2], [2, 0, 0, 4], [0, 4, 0, 0], [0, 2, 4, 0]], 'solution': [[4, 3, 1, 2], [2, 1, 3, 4], [1, 4, 2, 3], [3, 2, 4, 1]], 'num_empty': 10}
 
 Example 3:
 Question: Solve this 4x4 Mini Sudoku puzzle:
-_ 2 4 3
-_ 3 _ _
-2 _ _ _
-_ 1 2 _
-Answer: 1 2 4 3
-4 3 1 2
-2 4 3 1
+4 2 _ _
+3 _ 2 4
+_ _ _ _
+_ 4 3 2
+Answer: 4 2 1 3
 3 1 2 4
-Metadata: {'puzzle': [[0, 2, 4, 3], [0, 3, 0, 0], [2, 0, 0, 0], [0, 1, 2, 0]], 'solution': [[1, 2, 4, 3], [4, 3, 1, 2], [2, 4, 3, 1], [3, 1, 2, 4]], 'num_empty': 9}
+2 3 4 1
+1 4 3 2
+Metadata: {'puzzle': [[4, 2, 0, 0], [3, 0, 2, 4], [0, 0, 0, 0], [0, 4, 3, 2]], 'solution': [[4, 2, 1, 3], [3, 1, 2, 4], [2, 3, 4, 1], [1, 4, 3, 2]], 'num_empty': 8}
 
 ```
 
-### number_filtering {number-filtering}
+### number_filtering
 Generates number filtering tasks
 
 Default configuration:
@@ -698,23 +848,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Remove all numbers larger than 49.350 in this list: ['-96', '58.6', '39', '4.1432']
-Answer: ['-96', '39', '4.1432']
-Metadata: {'original_numbers': ['-96', '58.6', '39', '4.1432'], 'filter_value': '49.350', 'operation': 'remove_larger', 'result': ['-96', '39', '4.1432']}
+Question: Remove all numbers smaller than -78.527 in this list: ['-14.14', '10.92', '-56.57', '-56', '-84.8', '20']
+Answer: ['-14.14', '10.92', '-56.57', '-56', '20']
+Metadata: {'original_numbers': ['-14.14', '10.92', '-56.57', '-56', '-84.8', '20'], 'filter_value': '-78.527', 'operation': 'remove_smaller', 'result': ['-14.14', '10.92', '-56.57', '-56', '20']}
 
 Example 2:
-Question: Remove all numbers larger than -58.8 in this list: ['42.685', '38.4878', '27.3', '29.6', '-41.16', '87.20', '-66.104', '57.848', '10.3373', '-45.7']
-Answer: ['-66.104']
-Metadata: {'original_numbers': ['42.685', '38.4878', '27.3', '29.6', '-41.16', '87.20', '-66.104', '57.848', '10.3373', '-45.7'], 'filter_value': '-58.8', 'operation': 'remove_larger', 'result': ['-66.104']}
+Question: Remove all numbers larger than 19 in this list: ['20', '66', '-22.729', '-21.62', '-6.2198', '4', '34.0', '-43.9360', '98.011', '-1.2024']
+Answer: ['-22.729', '-21.62', '-6.2198', '4', '-43.9360', '-1.2024']
+Metadata: {'original_numbers': ['20', '66', '-22.729', '-21.62', '-6.2198', '4', '34.0', '-43.9360', '98.011', '-1.2024'], 'filter_value': '19', 'operation': 'remove_larger', 'result': ['-22.729', '-21.62', '-6.2198', '4', '-43.9360', '-1.2024']}
 
 Example 3:
-Question: Keep all numbers smaller than -82.5 in this list: ['-27.517', '11.04', '61', '-95.59', '-89.6322', '84.9458', '-19.8']
-Answer: ['-95.59', '-89.6322']
-Metadata: {'original_numbers': ['-27.517', '11.04', '61', '-95.59', '-89.6322', '84.9458', '-19.8'], 'filter_value': '-82.5', 'operation': 'keep_smaller', 'result': ['-95.59', '-89.6322']}
+Question: Keep all numbers smaller than 2.319 in this list: ['99', '-21', '-77.530', '7', '-11', '87.2816', '94.319', '-36', '-25.7766', '30.013']
+Answer: ['-21', '-77.530', '-11', '-36', '-25.7766']
+Metadata: {'original_numbers': ['99', '-21', '-77.530', '7', '-11', '87.2816', '94.319', '-36', '-25.7766', '30.013'], 'filter_value': '2.319', 'operation': 'keep_smaller', 'result': ['-21', '-77.530', '-11', '-36', '-25.7766']}
 
 ```
 
-### number_sequence {number-sequence}
+### number_sequence
 Generates number sequence completion tasks with dynamic pattern generation
 
 Default configuration:
@@ -731,23 +881,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: 7, 3, 1, 0, 0, 0, ?
+Question: 9, 4, 2, 1, 0, 0, 0, ?
 Answer: 0
-Metadata: {'rule': 'halve', 'complexity': 2, 'sequence': [7, 3, 1, 0, 0, 0, 0]}
+Metadata: {'rule': 'halve', 'complexity': 2, 'sequence': [9, 4, 2, 1, 0, 0, 0, 0]}
 
 Example 2:
-Question: -5, -3, -2, -1, ?
-Answer: -1
-Metadata: {'rule': 'halve', 'complexity': 3, 'sequence': [-5, -3, -2, -1, -1]}
+Question: -2, 1, 7, 19, 43, 91, 187, 379, ?
+Answer: 763
+Metadata: {'rule': 'double then add 5', 'complexity': 1, 'sequence': [-2, 1, 7, 19, 43, 91, 187, 379, 763]}
 
 Example 3:
-Question: 5, 5, 10, 15, 25, 40, 65, ?
-Answer: 105
-Metadata: {'rule': 'add previous', 'complexity': 1, 'sequence': [5, 5, 10, 15, 25, 40, 65, 105]}
+Question: 1, 0, 0, 0, 0, 0, 0, ?
+Answer: 0
+Metadata: {'rule': 'halve then multiply by 8', 'complexity': 1, 'sequence': [1, 0, 0, 0, 0, 0, 0, 0]}
 
 ```
 
-### number_sorting {number-sorting}
+### number_sorting
 Generates number sorting tasks
 
 Default configuration:
@@ -765,23 +915,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Sort these numbers in descending order: 34, 4, -49, -52, -19
-Answer: ['34', '4', '-19', '-49', '-52']
-Metadata: {'original_numbers': ['34', '4', '-49', '-52', '-19'], 'direction': 'descending', 'sorted_numbers': ['34', '4', '-19', '-49', '-52']}
+Question: Sort these numbers in ascending order: -6.78, -92.30, 91.23, -77.49, 95.03, 74.19, 70.26, -67.10
+Answer: ['-92.30', '-77.49', '-67.10', '-6.78', '70.26', '74.19', '91.23', '95.03']
+Metadata: {'original_numbers': ['-6.78', '-92.30', '91.23', '-77.49', '95.03', '74.19', '70.26', '-67.10'], 'direction': 'ascending', 'sorted_numbers': ['-92.30', '-77.49', '-67.10', '-6.78', '70.26', '74.19', '91.23', '95.03']}
 
 Example 2:
-Question: Sort these numbers in descending order: -4.44, 91.85, -86.58, -93.98, -92.88, 71.69, 25.88, 57.53, 89.65
-Answer: ['91.85', '89.65', '71.69', '57.53', '25.88', '-4.44', '-86.58', '-92.88', '-93.98']
-Metadata: {'original_numbers': ['-4.44', '91.85', '-86.58', '-93.98', '-92.88', '71.69', '25.88', '57.53', '89.65'], 'direction': 'descending', 'sorted_numbers': ['91.85', '89.65', '71.69', '57.53', '25.88', '-4.44', '-86.58', '-92.88', '-93.98']}
+Question: Sort these numbers in descending order: -10.32, 68.71, -89.59, 57.02, 12.29, -75.18, 49.79, -62.58, -58.82
+Answer: ['68.71', '57.02', '49.79', '12.29', '-10.32', '-58.82', '-62.58', '-75.18', '-89.59']
+Metadata: {'original_numbers': ['-10.32', '68.71', '-89.59', '57.02', '12.29', '-75.18', '49.79', '-62.58', '-58.82'], 'direction': 'descending', 'sorted_numbers': ['68.71', '57.02', '49.79', '12.29', '-10.32', '-58.82', '-62.58', '-75.18', '-89.59']}
 
 Example 3:
-Question: Sort these numbers in descending order: -34.19, -85.95, -6.94, -74.52, 5.10, -18.09, -4.41
-Answer: ['5.10', '-4.41', '-6.94', '-18.09', '-34.19', '-74.52', '-85.95']
-Metadata: {'original_numbers': ['-34.19', '-85.95', '-6.94', '-74.52', '5.10', '-18.09', '-4.41'], 'direction': 'descending', 'sorted_numbers': ['5.10', '-4.41', '-6.94', '-18.09', '-34.19', '-74.52', '-85.95']}
+Question: Sort these numbers in descending order: 10.13, 72.60, 72.13, 14.65, 1.16, -26.82, 55.17, 37.38, 76.73, -82.92
+Answer: ['76.73', '72.60', '72.13', '55.17', '37.38', '14.65', '10.13', '1.16', '-26.82', '-82.92']
+Metadata: {'original_numbers': ['10.13', '72.60', '72.13', '14.65', '1.16', '-26.82', '55.17', '37.38', '76.73', '-82.92'], 'direction': 'descending', 'sorted_numbers': ['76.73', '72.60', '72.13', '55.17', '37.38', '14.65', '10.13', '1.16', '-26.82', '-82.92']}
 
 ```
 
-### polynomial_equations {polynomial-equations}
+### polynomial_equations
 Generates random polynomial equations of degree in [min_degree, max_degree].
     - The polynomial is formed by summing random terms of the form: coeff * x^exponent.
     - Then we solve "polynomial_expr = 0" using Sympy.
@@ -803,23 +953,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Find the real value(s) of q in the equation: -166*q**2 - 83*q = 0
-Answer: [-0.5, 0.0]
-Metadata: {'polynomial_expr': '-166*q**2 - 83*q', 'variable': 'q', 'degree': 2, 'real_solutions': [-0.5, 0.0]}
+Question: Determine the real value(s) of a tha satisfies: -35*a**2 = 0
+Answer: [0.0]
+Metadata: {'polynomial_expr': '-35*a**2', 'variable': 'a', 'degree': 2, 'real_solutions': [0.0]}
 
 Example 2:
-Question: Determine the real value(s) of i tha satisfies: -41*i = 0
-Answer: [0.0]
-Metadata: {'polynomial_expr': '-41*i', 'variable': 'i', 'degree': 1, 'real_solutions': [0.0]}
+Question: Solve for real l: 27*l**2 + 175*l - 1 = 0
+Answer: [-6.487190738158517, 0.005709256677035911]
+Metadata: {'polynomial_expr': '27*l**2 + 175*l - 1', 'variable': 'l', 'degree': 2, 'real_solutions': [-6.487190738158517, 0.005709256677035911]}
 
 Example 3:
-Question: Find the real value(s) of t in the equation: -153*t = 0
-Answer: [0.0]
-Metadata: {'polynomial_expr': '-153*t', 'variable': 't', 'degree': 1, 'real_solutions': [0.0]}
+Question: Find the real value(s) of t in the equation: 94 - 9*t**2 = 0
+Answer: [-3.2317865716108862, 3.2317865716108862]
+Metadata: {'polynomial_expr': '94 - 9*t**2', 'variable': 't', 'degree': 2, 'real_solutions': [-3.2317865716108862, 3.2317865716108862]}
 
 ```
 
-### prime_factorization {prime-factorization}
+### prime_factorization
 Generates prime factorization tasks
 
 Default configuration:
@@ -833,23 +983,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Find the prime factorization of 139. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
-Answer: 139
-Metadata: {'number': 139, 'factors': [139]}
+Question: Find the prime factorization of 973. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
+Answer: 7 × 139
+Metadata: {'number': 973, 'factors': [7, 139]}
 
 Example 2:
-Question: Find the prime factorization of 172. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
-Answer: 2 × 2 × 43
-Metadata: {'number': 172, 'factors': [2, 2, 43]}
+Question: Find the prime factorization of 153. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
+Answer: 3 × 3 × 17
+Metadata: {'number': 153, 'factors': [3, 3, 17]}
 
 Example 3:
-Question: Find the prime factorization of 562. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
-Answer: 2 × 281
-Metadata: {'number': 562, 'factors': [2, 281]}
+Question: Find the prime factorization of 390. Write the factors separated by × (Example: for 12 the answer would be: 2 × 2 × 3)
+Answer: 2 × 3 × 5 × 13
+Metadata: {'number': 390, 'factors': [2, 3, 5, 13]}
 
 ```
 
-### propositional_logic {propositional-logic}
+### propositional_logic
 Generates propositional logic reasoning tasks
 
 Default configuration:
@@ -867,33 +1017,34 @@ Example tasks:
 ```
 Example 1:
 Question: Given:
-1. Q
-2. S
-3. P
+1. (Q → P)
+2. (P → P)
+3. ((P ∨ Q) ↔ (P ↔ Q))
+4. (Q ∨ P)
 What can we conclude?
-Answer: (P ∧ S)
-Metadata: {'premises': ['Q', 'S', 'P'], 'variables': ['P', 'Q', 'R', 'S'], 'complexity': 3}
+Answer: (P ∧ P)
+Metadata: {'premises': ['(Q → P)', '(P → P)', '((P ∨ Q) ↔ (P ↔ Q))', '(Q ∨ P)'], 'variables': ['P', 'Q'], 'complexity': 3}
 
 Example 2:
 Question: Given:
-1. (P ∨ Q)
-2. P
+1. P
+2. ¬(P ∧ P)
+3. Q
 What can we conclude?
-Answer: (P ∨ Q)
-Metadata: {'premises': ['(P ∨ Q)', 'P'], 'variables': ['P', 'Q', 'R'], 'complexity': 3}
+Answer: (P ∧ P)
+Metadata: {'premises': ['P', '¬(P ∧ P)', 'Q'], 'variables': ['P', 'Q', 'R'], 'complexity': 3}
 
 Example 3:
 Question: Given:
-1. Q
-2. ((Q ↔ P) → (Q → Q))
-3. ((Q → P) → (P ↔ Q))
+1. ¬(R → P)
+2. ¬P
 What can we conclude?
-Answer: (P → Q)
-Metadata: {'premises': ['Q', '((Q ↔ P) → (Q → Q))', '((Q → P) → (P ↔ Q))'], 'variables': ['P', 'Q'], 'complexity': 3}
+Answer: (Q ↔ Q)
+Metadata: {'premises': ['¬(R → P)', '¬P'], 'variables': ['P', 'Q', 'R'], 'complexity': 3}
 
 ```
 
-### quantum_lock {quantum-lock}
+### quantum_lock
 Generates QuantumLock tasks
 
 Default configuration:
@@ -910,43 +1061,43 @@ Question: In front of you are some buttons, a light, and a number. The light wil
 You must press the shortest correct sequence of buttons to reach the target value.
 
 Start: 0 (red)
-Target: 36
+Target: 38
 Buttons:
-A: Add 3 (when any)
-B: Multiply 3 (when any)
-C: Multiply 3 (when red)
-Answer: A → B → A → B
-Metadata: {'difficulty': 10, 'solution_path': ['A', 'B', 'A', 'B'], 'target_value': 36, 'buttons': [{'name': 'A', 'type': 'add', 'value': 3, 'active_state': 'any'}, {'name': 'B', 'type': 'multiply', 'value': 3, 'active_state': 'any'}, {'name': 'C', 'type': 'multiply', 'value': 3, 'active_state': 'red'}], 'initial_state': 'red', 'initial_value': 0}
+A: Multiply 2 (when any)
+B: Add 2 (when red)
+C: Multiply 3 (when any)
+Answer: B → A → C → C → B
+Metadata: {'difficulty': 10, 'solution_path': ['B', 'A', 'C', 'C', 'B'], 'target_value': 38, 'buttons': [{'name': 'A', 'type': 'multiply', 'value': 2, 'active_state': 'any'}, {'name': 'B', 'type': 'add', 'value': 2, 'active_state': 'red'}, {'name': 'C', 'type': 'multiply', 'value': 3, 'active_state': 'any'}], 'initial_state': 'red', 'initial_value': 0}
 
 Example 2:
 Question: In front of you are some buttons, a light, and a number. The light will toggle between red and green whenever you press a button. Each button performs a mathematical operation to the number, but the operation may depend on the state of the light.
 You must press the shortest correct sequence of buttons to reach the target value.
 
 Start: 0 (red)
-Target: 30
+Target: 42
 Buttons:
-A: Subtract 2 (when red)
-B: Add 3 (when any)
-C: Subtract 3 (when green)
-Answer: B → B → B → B → B → B → B → B → B → B
-Metadata: {'difficulty': 10, 'solution_path': ['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'], 'target_value': 30, 'buttons': [{'name': 'A', 'type': 'subtract', 'value': 2, 'active_state': 'red'}, {'name': 'B', 'type': 'add', 'value': 3, 'active_state': 'any'}, {'name': 'C', 'type': 'subtract', 'value': 3, 'active_state': 'green'}], 'initial_state': 'red', 'initial_value': 0}
+A: Multiply 3 (when any)
+B: Add 2 (when any)
+C: Add 3 (when any)
+Answer: B → B → A → B → A
+Metadata: {'difficulty': 10, 'solution_path': ['B', 'B', 'A', 'B', 'A'], 'target_value': 42, 'buttons': [{'name': 'A', 'type': 'multiply', 'value': 3, 'active_state': 'any'}, {'name': 'B', 'type': 'add', 'value': 2, 'active_state': 'any'}, {'name': 'C', 'type': 'add', 'value': 3, 'active_state': 'any'}], 'initial_state': 'red', 'initial_value': 0}
 
 Example 3:
 Question: In front of you are some buttons, a light, and a number. The light will toggle between red and green whenever you press a button. Each button performs a mathematical operation to the number, but the operation may depend on the state of the light.
 You must press the shortest correct sequence of buttons to reach the target value.
 
 Start: 0 (red)
-Target: 38
+Target: 35
 Buttons:
-A: Add 2 (when any)
-B: Add 3 (when any)
-C: Subtract 2 (when any)
-Answer: A → B → B → B → B → B → B → B → B → B → B → B → B
-Metadata: {'difficulty': 10, 'solution_path': ['A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'], 'target_value': 38, 'buttons': [{'name': 'A', 'type': 'add', 'value': 2, 'active_state': 'any'}, {'name': 'B', 'type': 'add', 'value': 3, 'active_state': 'any'}, {'name': 'C', 'type': 'subtract', 'value': 2, 'active_state': 'any'}], 'initial_state': 'red', 'initial_value': 0}
+A: Multiply 3 (when red)
+B: Add 2 (when green)
+C: Subtract 3 (when any)
+Answer: A → B → A → C → A → B → A → B
+Metadata: {'difficulty': 10, 'solution_path': ['A', 'B', 'A', 'C', 'A', 'B', 'A', 'B'], 'target_value': 35, 'buttons': [{'name': 'A', 'type': 'multiply', 'value': 3, 'active_state': 'red'}, {'name': 'B', 'type': 'add', 'value': 2, 'active_state': 'green'}, {'name': 'C', 'type': 'subtract', 'value': 3, 'active_state': 'any'}], 'initial_state': 'red', 'initial_value': 0}
 
 ```
 
-### rubiks_cube {rubiks-cube}
+### rubiks_cube
 Generates RubiksCube tasks
 
 Default configuration:
@@ -961,62 +1112,62 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: You see a size 3 Rubik's cube. It is arranged this:
+Question: You are given a 3x3x3 Rubik's cube. It looks like this:
 
-          R  R  R
           Y  Y  Y
-          R  R  R
- W  R  W  G  G  G  Y  O  Y  B  B  B
- W  R  W  G  G  G  Y  O  Y  B  B  B
- B  B  B  W  R  W  G  G  G  Y  O  Y
-          O  W  O
-          O  W  O
-          O  W  O
+          Y  Y  Y
+          Y  Y  Y
+ G  G  G  O  O  O  B  B  B  R  R  R
+ R  R  R  G  G  G  O  O  O  B  B  B
+ R  R  R  G  G  G  O  O  O  B  B  B
+          W  W  W
+          W  W  W
+          W  W  W
 
 
-Please provide a solution to solve this cube.
+Please provide a solution to solve this cube using Singmaster notation.
 Answer: None
-Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': "B' F D", 'example_correct_answer': "B F D F' D' F' D B' D' R U' R' L U L' U' R' U R L U' L' U L U' L' U' B' U B U' U' F' U F U R U' R' U' B U' B' U' R' U R U' U' B' U B U L U' L' R U R' U R U U R' U U R U' L' U R' U' L U R U' L' U R' U' L U R' D' R D R' D' R D R' D' R D R' D' R D U R' D' R D R' D' R D R' D' R D R' D' R D U R' D' R D R' D' R D R' D' R D R' D' R D U"}
+Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': "U L L'", 'example_correct_answer': "U'"}
 
 Example 2:
 Question: You see a size 3 Rubik's cube. It is arranged this:
 
-          B  O  G
-          B  Y  G
-          B  Y  G
- Y  Y  Y  O  G  W  O  O  O  Y  B  R
- R  R  R  Y  G  W  O  O  O  Y  B  W
- R  R  R  Y  G  R  W  W  W  O  B  W
-          G  W  B
-          G  W  B
-          G  R  B
+          Y  Y  O
+          Y  Y  O
+          Y  Y  B
+ R  R  R  G  G  Y  O  G  G  W  B  B
+ R  R  Y  O  G  G  W  O  O  B  B  B
+ R  R  Y  O  G  G  W  O  O  B  B  B
+          G  R  R
+          W  W  W
+          W  W  W
 
 
 Please provide a solution to solve this cube.
 Answer: None
-Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': 'B L R', 'example_correct_answer': "R' B' U' L D F' D' U L U' L' U F U' F' U' L' U L U F U' F' U L' U L U F U' F' U' F' U F U R U' R' U' F' U F U R U' R' U F' U F U R U' R' F R U R' U' F' U R U R' U R U U R' L U' R' U L' U' R U L U' R' U L' U' D' R D R' D' R D U R' D' R D R' D' R D R' D' R D R' D' R D U R' D' R D R' D' R D R' D' R D R' D' R D U R' D' R D R' D' R D U"}
+Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': "U F' U'", 'example_correct_answer': "U F U'"}
 
 Example 3:
 Question: You see a size 3 Rubik's cube. It is arranged this:
 
+          R  R  R
+          B  Y  Y
+          O  O  O
+ G  R  Y  G  G  G  W  O  B  W  W  W
+ W  R  Y  G  G  G  W  O  Y  B  B  B
+ W  R  B  Y  Y  Y  G  O  Y  B  B  B
+          R  R  R
+          G  W  W
           O  O  O
-          Y  Y  G
-          Y  Y  G
- G  R  R  G  G  W  O  O  B  Y  Y  Y
- Y  R  R  G  G  W  O  O  W  B  B  B
- B  B  B  Y  R  R  G  G  W  O  O  W
-          R  W  W
-          R  W  W
-          R  B  B
 
 
 Please provide a solution to solve this cube.
 Answer: None
-Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': 'R B D', 'example_correct_answer': "B' F D F' D' R D R' B' D' L' U' L U R U' R' U' L U U L' F U' F' L' U U L U' B' U B U L U' L' L' U L U F U' F' U' F' U F U R U' R' U' U' R' U R U B U' B' U' U' B' U B U L U' L' U F R U R' U' R U R' U' F' U R U R' U R U U R' L U' R' U L' U' R U L U' R' U L' U' R U R' D' R D R' D' R D U R' D' R D R' D' R D U R' D' R D R' D' R D U"}
+Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': "L B' F'", 'example_correct_answer': "B L' F U F U' F' U F R U R' U' F' R U R' U R U U R' U' R U R' U R U U R' U' L U' R' U L' U' R U L U' R' U L' U' D' R D R' D' R D R' D' R D R' D' R D U R' D' R D R' D' R D U'"}
 
 ```
 
-### sentence_reordering {sentence-reordering}
+### sentence_reordering
 Generates sentence reordering tasks from text spans
 
 Default configuration:
@@ -1030,23 +1181,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Restore the correct order of words in the following sentence: about We think must it.
-Answer: We must think about it.
+Question: Restore the correct order of words in the following sentence: thing first that Mr. The
+Answer: The first thing that Mr.
 Metadata: {'word_count': 5}
 
 Example 2:
-Question: Restore the correct order of words in the following sentence: 1 through 1.
-Answer: 1 through 1.
-Metadata: {'word_count': 3}
+Question: Restore the correct order of words in the following sentence: shall The to called be the attention of government the matter. Chinese
+Answer: The attention of the the Chinese government shall be called to matter.
+Metadata: {'word_count': 12}
 
 Example 3:
-Question: Restore the correct order of words in the following sentence: lease Smith of great of a has falls obtained Niagara. the
-Answer: Smith has obtained a lease of of the great falls Niagara.
-Metadata: {'word_count': 11}
+Question: Restore the correct order of words in the following sentence: wonderful we are the accumulators. indebted instruments those new for Jackson To
+Answer: To Jackson we are indebted for those wonderful instruments the new accumulators.
+Metadata: {'word_count': 12}
 
 ```
 
-### simple_equations {simple-equations}
+### simple_equations
 Generates simple equations with one variable to solve
 
 Default configuration:
@@ -1063,23 +1214,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Find the value of o in the equation: 84*o - 79 = 4625
-Answer: 56
-Metadata: {'equation': '84*o - 79 = 4625', 'variable': 'o'}
+Question: Solve for j: 69 - 47*j = -4020
+Answer: 87
+Metadata: {'equation': '69 - 47*j = -4020', 'variable': 'j'}
 
 Example 2:
-Question: Find the value of e in the equation: 2068*e = 198528
-Answer: 96
-Metadata: {'equation': '2068*e = 198528', 'variable': 'e'}
+Question: Solve for o: 210000*o + 98 = 840098
+Answer: 4
+Metadata: {'equation': '210000*o + 98 = 840098', 'variable': 'o'}
 
 Example 3:
-Question: Determine the value of g that satisfies: 71*g - 80 = 204
-Answer: 4
-Metadata: {'equation': '71*g - 80 = 204', 'variable': 'g'}
+Question: Find the value of a in the equation: 6930*a = 297990
+Answer: 43
+Metadata: {'equation': '6930*a = 297990', 'variable': 'a'}
 
 ```
 
-### spell_backward {spell-backward}
+### spell_backward
 Generates tasks to spell words backward
 
 Default configuration:
@@ -1092,23 +1243,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Spell this word backward (example: sun -> nus): made
-Answer: edam
-Metadata: {'word': 'made', 'word_len': 4}
+Question: Spell this word backward (example: sun -> nus): only
+Answer: ylno
+Metadata: {'word': 'only', 'word_len': 4}
 
 Example 2:
-Question: Spell this word backward (example: sun -> nus): then
-Answer: neht
-Metadata: {'word': 'then', 'word_len': 4}
+Question: Spell this word backward (example: sun -> nus): from
+Answer: morf
+Metadata: {'word': 'from', 'word_len': 4}
 
 Example 3:
-Question: Spell this word backward (example: sun -> nus): Europe
-Answer: eporuE
-Metadata: {'word': 'Europe', 'word_len': 6}
+Question: Spell this word backward (example: sun -> nus): anxiously
+Answer: ylsuoixna
+Metadata: {'word': 'anxiously', 'word_len': 9}
 
 ```
 
-### sudoku {sudoku}
+### sudoku
 Generates sudoku puzzles with configurable difficulty
 
 Default configuration:
@@ -1123,73 +1274,73 @@ Example tasks:
 ```
 Example 1:
 Question: Solve this Sudoku puzzle:
-_ _ 2 _ _ _ 6 7 _
-7 _ _ _ _ 9 _ _ _
-3 _ _ _ 8 7 4 _ _
-_ 8 4 _ 7 _ 9 _ _
-_ _ _ _ _ _ 3 _ _
-9 _ 3 1 _ _ _ 8 7
-_ 1 8 4 9 _ _ 5 3
-_ _ _ 8 5 1 2 9 4
-4 5 9 _ _ 2 _ _ 6
-Answer: 8 4 2 5 1 3 6 7 9
-7 6 5 2 4 9 1 3 8
-3 9 1 6 8 7 4 2 5
-1 8 4 3 7 5 9 6 2
-5 7 6 9 2 8 3 4 1
-9 2 3 1 6 4 5 8 7
-2 1 8 4 9 6 7 5 3
-6 3 7 8 5 1 2 9 4
-4 5 9 7 3 2 8 1 6
-Metadata: {'puzzle': [[0, 0, 2, 0, 0, 0, 6, 7, 0], [7, 0, 0, 0, 0, 9, 0, 0, 0], [3, 0, 0, 0, 8, 7, 4, 0, 0], [0, 8, 4, 0, 7, 0, 9, 0, 0], [0, 0, 0, 0, 0, 0, 3, 0, 0], [9, 0, 3, 1, 0, 0, 0, 8, 7], [0, 1, 8, 4, 9, 0, 0, 5, 3], [0, 0, 0, 8, 5, 1, 2, 9, 4], [4, 5, 9, 0, 0, 2, 0, 0, 6]], 'solution': [[8, 4, 2, 5, 1, 3, 6, 7, 9], [7, 6, 5, 2, 4, 9, 1, 3, 8], [3, 9, 1, 6, 8, 7, 4, 2, 5], [1, 8, 4, 3, 7, 5, 9, 6, 2], [5, 7, 6, 9, 2, 8, 3, 4, 1], [9, 2, 3, 1, 6, 4, 5, 8, 7], [2, 1, 8, 4, 9, 6, 7, 5, 3], [6, 3, 7, 8, 5, 1, 2, 9, 4], [4, 5, 9, 7, 3, 2, 8, 1, 6]], 'num_empty': 45}
+_ 8 _ 2 _ _ _ _ 3
+_ _ 4 _ 7 _ _ 8 9
+2 5 6 3 _ _ _ 4 7
+_ _ 8 _ 6 _ 9 5 _
+9 _ 2 7 _ 5 _ _ _
+3 6 _ _ 2 9 8 _ _
+_ 4 3 _ 5 2 7 _ _
+_ _ 1 _ _ _ 4 2 8
+6 2 _ 8 4 1 3 9 5
+Answer: 7 8 9 2 1 4 5 6 3
+1 3 4 5 7 6 2 8 9
+2 5 6 3 9 8 1 4 7
+4 7 8 1 6 3 9 5 2
+9 1 2 7 8 5 6 3 4
+3 6 5 4 2 9 8 7 1
+8 4 3 9 5 2 7 1 6
+5 9 1 6 3 7 4 2 8
+6 2 7 8 4 1 3 9 5
+Metadata: {'puzzle': [[0, 8, 0, 2, 0, 0, 0, 0, 3], [0, 0, 4, 0, 7, 0, 0, 8, 9], [2, 5, 6, 3, 0, 0, 0, 4, 7], [0, 0, 8, 0, 6, 0, 9, 5, 0], [9, 0, 2, 7, 0, 5, 0, 0, 0], [3, 6, 0, 0, 2, 9, 8, 0, 0], [0, 4, 3, 0, 5, 2, 7, 0, 0], [0, 0, 1, 0, 0, 0, 4, 2, 8], [6, 2, 0, 8, 4, 1, 3, 9, 5]], 'solution': [[7, 8, 9, 2, 1, 4, 5, 6, 3], [1, 3, 4, 5, 7, 6, 2, 8, 9], [2, 5, 6, 3, 9, 8, 1, 4, 7], [4, 7, 8, 1, 6, 3, 9, 5, 2], [9, 1, 2, 7, 8, 5, 6, 3, 4], [3, 6, 5, 4, 2, 9, 8, 7, 1], [8, 4, 3, 9, 5, 2, 7, 1, 6], [5, 9, 1, 6, 3, 7, 4, 2, 8], [6, 2, 7, 8, 4, 1, 3, 9, 5]], 'num_empty': 38}
 
 Example 2:
 Question: Solve this Sudoku puzzle:
-3 5 _ _ _ _ _ _ _
-_ 1 _ 3 _ 8 5 4 6
-7 _ 8 9 _ _ _ 3 2
-2 3 7 _ 4 _ _ 8 _
-_ _ 1 8 _ 2 3 _ 4
-_ _ 4 7 9 3 6 _ _
-8 6 _ _ _ _ 2 _ _
-_ 2 _ _ 8 7 _ _ _
-_ _ _ 6 2 _ 8 5 _
-Answer: 3 5 6 2 1 4 7 9 8
-9 1 2 3 7 8 5 4 6
-7 4 8 9 6 5 1 3 2
-2 3 7 1 4 6 9 8 5
-6 9 1 8 5 2 3 7 4
-5 8 4 7 9 3 6 2 1
-8 6 5 4 3 9 2 1 7
-1 2 3 5 8 7 4 6 9
-4 7 9 6 2 1 8 5 3
-Metadata: {'puzzle': [[3, 5, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 3, 0, 8, 5, 4, 6], [7, 0, 8, 9, 0, 0, 0, 3, 2], [2, 3, 7, 0, 4, 0, 0, 8, 0], [0, 0, 1, 8, 0, 2, 3, 0, 4], [0, 0, 4, 7, 9, 3, 6, 0, 0], [8, 6, 0, 0, 0, 0, 2, 0, 0], [0, 2, 0, 0, 8, 7, 0, 0, 0], [0, 0, 0, 6, 2, 0, 8, 5, 0]], 'solution': [[3, 5, 6, 2, 1, 4, 7, 9, 8], [9, 1, 2, 3, 7, 8, 5, 4, 6], [7, 4, 8, 9, 6, 5, 1, 3, 2], [2, 3, 7, 1, 4, 6, 9, 8, 5], [6, 9, 1, 8, 5, 2, 3, 7, 4], [5, 8, 4, 7, 9, 3, 6, 2, 1], [8, 6, 5, 4, 3, 9, 2, 1, 7], [1, 2, 3, 5, 8, 7, 4, 6, 9], [4, 7, 9, 6, 2, 1, 8, 5, 3]], 'num_empty': 43}
+5 _ _ _ 3 4 _ 6 _
+_ _ 3 _ _ _ _ _ _
+_ _ 8 5 9 _ _ _ 2
+_ 5 7 6 4 _ _ 8 _
+_ 4 6 _ _ _ _ 5 3
+_ 3 _ _ _ 5 _ _ _
+6 8 1 _ _ 9 _ _ _
+_ 9 5 _ 2 _ _ 4 _
+_ 2 _ _ 8 6 1 9 5
+Answer: 5 7 2 1 3 4 8 6 9
+9 1 3 2 6 8 5 7 4
+4 6 8 5 9 7 3 1 2
+2 5 7 6 4 3 9 8 1
+8 4 6 9 1 2 7 5 3
+1 3 9 8 7 5 4 2 6
+6 8 1 4 5 9 2 3 7
+3 9 5 7 2 1 6 4 8
+7 2 4 3 8 6 1 9 5
+Metadata: {'puzzle': [[5, 0, 0, 0, 3, 4, 0, 6, 0], [0, 0, 3, 0, 0, 0, 0, 0, 0], [0, 0, 8, 5, 9, 0, 0, 0, 2], [0, 5, 7, 6, 4, 0, 0, 8, 0], [0, 4, 6, 0, 0, 0, 0, 5, 3], [0, 3, 0, 0, 0, 5, 0, 0, 0], [6, 8, 1, 0, 0, 9, 0, 0, 0], [0, 9, 5, 0, 2, 0, 0, 4, 0], [0, 2, 0, 0, 8, 6, 1, 9, 5]], 'solution': [[5, 7, 2, 1, 3, 4, 8, 6, 9], [9, 1, 3, 2, 6, 8, 5, 7, 4], [4, 6, 8, 5, 9, 7, 3, 1, 2], [2, 5, 7, 6, 4, 3, 9, 8, 1], [8, 4, 6, 9, 1, 2, 7, 5, 3], [1, 3, 9, 8, 7, 5, 4, 2, 6], [6, 8, 1, 4, 5, 9, 2, 3, 7], [3, 9, 5, 7, 2, 1, 6, 4, 8], [7, 2, 4, 3, 8, 6, 1, 9, 5]], 'num_empty': 47}
 
 Example 3:
 Question: Solve this Sudoku puzzle:
-2 _ 1 4 _ 5 6 _ _
-_ 8 _ 6 _ 1 5 2 9
-_ _ _ _ _ 2 _ 3 _
-1 _ 4 2 _ _ _ _ 5
-_ _ _ _ 4 _ _ 6 _
-_ _ 9 _ _ _ 2 4 _
-8 _ _ 5 1 6 3 _ 7
-9 _ _ 7 _ 3 _ 1 2
-3 _ _ 9 _ 4 _ _ 6
-Answer: 2 9 1 4 3 5 6 7 8
-4 8 3 6 7 1 5 2 9
-7 5 6 8 9 2 1 3 4
-1 3 4 2 6 7 9 8 5
-5 2 8 1 4 9 7 6 3
-6 7 9 3 5 8 2 4 1
-8 4 2 5 1 6 3 9 7
-9 6 5 7 8 3 4 1 2
-3 1 7 9 2 4 8 5 6
-Metadata: {'puzzle': [[2, 0, 1, 4, 0, 5, 6, 0, 0], [0, 8, 0, 6, 0, 1, 5, 2, 9], [0, 0, 0, 0, 0, 2, 0, 3, 0], [1, 0, 4, 2, 0, 0, 0, 0, 5], [0, 0, 0, 0, 4, 0, 0, 6, 0], [0, 0, 9, 0, 0, 0, 2, 4, 0], [8, 0, 0, 5, 1, 6, 3, 0, 7], [9, 0, 0, 7, 0, 3, 0, 1, 2], [3, 0, 0, 9, 0, 4, 0, 0, 6]], 'solution': [[2, 9, 1, 4, 3, 5, 6, 7, 8], [4, 8, 3, 6, 7, 1, 5, 2, 9], [7, 5, 6, 8, 9, 2, 1, 3, 4], [1, 3, 4, 2, 6, 7, 9, 8, 5], [5, 2, 8, 1, 4, 9, 7, 6, 3], [6, 7, 9, 3, 5, 8, 2, 4, 1], [8, 4, 2, 5, 1, 6, 3, 9, 7], [9, 6, 5, 7, 8, 3, 4, 1, 2], [3, 1, 7, 9, 2, 4, 8, 5, 6]], 'num_empty': 44}
+9 8 6 _ _ _ _ _ 3
+4 _ _ _ _ _ _ 6 _
+_ _ 3 6 7 _ _ _ 8
+_ _ 9 _ _ 3 6 _ _
+_ _ _ _ _ _ 7 4 2
+_ _ _ 4 _ _ _ _ _
+_ _ 2 5 _ _ _ 1 _
+_ 3 1 _ 4 6 8 9 7
+7 9 _ 8 _ _ _ _ 6
+Answer: 9 8 6 1 2 4 5 7 3
+4 2 7 3 8 5 1 6 9
+1 5 3 6 7 9 4 2 8
+2 4 9 7 1 3 6 8 5
+3 1 5 9 6 8 7 4 2
+6 7 8 4 5 2 9 3 1
+8 6 2 5 9 7 3 1 4
+5 3 1 2 4 6 8 9 7
+7 9 4 8 3 1 2 5 6
+Metadata: {'puzzle': [[9, 8, 6, 0, 0, 0, 0, 0, 3], [4, 0, 0, 0, 0, 0, 0, 6, 0], [0, 0, 3, 6, 7, 0, 0, 0, 8], [0, 0, 9, 0, 0, 3, 6, 0, 0], [0, 0, 0, 0, 0, 0, 7, 4, 2], [0, 0, 0, 4, 0, 0, 0, 0, 0], [0, 0, 2, 5, 0, 0, 0, 1, 0], [0, 3, 1, 0, 4, 6, 8, 9, 7], [7, 9, 0, 8, 0, 0, 0, 0, 6]], 'solution': [[9, 8, 6, 1, 2, 4, 5, 7, 3], [4, 2, 7, 3, 8, 5, 1, 6, 9], [1, 5, 3, 6, 7, 9, 4, 2, 8], [2, 4, 9, 7, 1, 3, 6, 8, 5], [3, 1, 5, 9, 6, 8, 7, 4, 2], [6, 7, 8, 4, 5, 2, 9, 3, 1], [8, 6, 2, 5, 9, 7, 3, 1, 4], [5, 3, 1, 2, 4, 6, 8, 9, 7], [7, 9, 4, 8, 3, 1, 2, 5, 6]], 'num_empty': 50}
 
 ```
 
-### syllogism {syllogism}
+### syllogism
 Generates syllogism reasoning tasks
 
 Default configuration:
@@ -1209,40 +1360,40 @@ Example tasks:
 ```
 Example 1:
 Question: Consider these statements:
-1. Some programmers are cats
-2. Some ... are not cats are engineers
+1. Some humans are reptiles
+2. Some reptiles are insects
 
 Does it logically follow that:
-No programmers are engineers?
+Some ... are not humans are insects?
 (Answer Yes or No)
-Answer: Yes
-Metadata: {'premise1': 'Some programmers are cats', 'premise2': 'Some ... are not cats are engineers', 'conclusion': 'No programmers are engineers', 'is_valid': True}
+Answer: No
+Metadata: {'premise1': 'Some humans are reptiles', 'premise2': 'Some reptiles are insects', 'conclusion': 'Some ... are not humans are insects', 'is_valid': False}
 
 Example 2:
 Question: Consider these statements:
-1. All parents are cats
-2. Some cats are lawyers
+1. All mortals are teachers
+2. Some teachers are ants
 
 Does it logically follow that:
-Some ... are not parents are lawyers?
+Some ... are not mortals are ants?
 (Answer Yes or No)
 Answer: Yes
-Metadata: {'premise1': 'All parents are cats', 'premise2': 'Some cats are lawyers', 'conclusion': 'Some ... are not parents are lawyers', 'is_valid': True}
+Metadata: {'premise1': 'All mortals are teachers', 'premise2': 'Some teachers are ants', 'conclusion': 'Some ... are not mortals are ants', 'is_valid': True}
 
 Example 3:
 Question: Consider these statements:
-1. No whales are birds
-2. Some birds are teachers
+1. No mortals are whales
+2. No whales are bees
 
 Does it logically follow that:
-All whales are teachers?
+No mortals are bees?
 (Answer Yes or No)
-Answer: Yes
-Metadata: {'premise1': 'No whales are birds', 'premise2': 'Some birds are teachers', 'conclusion': 'All whales are teachers', 'is_valid': True}
+Answer: No
+Metadata: {'premise1': 'No mortals are whales', 'premise2': 'No whales are bees', 'conclusion': 'No mortals are bees', 'is_valid': False}
 
 ```
 
-### word_sequence_reversal {word-sequence-reversal}
+### word_sequence_reversal
 Generates word sequence reversal tasks from text spans
 
 Default configuration:
@@ -1256,23 +1407,23 @@ size = 500
 Example tasks:
 ```
 Example 1:
-Question: Reverse this list of words: upon, bold, what, of, have
-Answer: have, of, what, bold, upon
-Metadata: {'num_words': 5, 'words': ['upon', 'bold', 'what', 'of', 'have']}
+Question: Reverse this list of words: Africa, harmless, moral
+Answer: moral, harmless, Africa
+Metadata: {'num_words': 3, 'words': ['Africa', 'harmless', 'moral']}
 
 Example 2:
-Question: Reverse this list of words: years, WILL, Gutenberg, Nevertheless
-Answer: Nevertheless, Gutenberg, WILL, years
-Metadata: {'num_words': 4, 'words': ['years', 'WILL', 'Gutenberg', 'Nevertheless']}
+Question: Reverse this list of words: efforts, well, set, these, back, Her, for
+Answer: for, Her, back, these, set, well, efforts
+Metadata: {'num_words': 7, 'words': ['efforts', 'well', 'set', 'these', 'back', 'Her', 'for']}
 
 Example 3:
-Question: Reverse this list of words: or, of, With, no
-Answer: no, With, of, or
-Metadata: {'num_words': 4, 'words': ['or', 'of', 'With', 'no']}
+Question: Reverse this list of words: fellow, compliance, few, which, in, famous, Not
+Answer: Not, famous, in, which, few, compliance, fellow
+Metadata: {'num_words': 7, 'words': ['fellow', 'compliance', 'few', 'which', 'in', 'famous', 'Not']}
 
 ```
 
-### word_sorting {word-sorting}
+### word_sorting
 Generates word sorting tasks
 
 Default configuration:
@@ -1290,20 +1441,20 @@ Example tasks:
 ```
 Example 1:
 Question: Sort these words in descending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-believe, content, How, dedicated, seasons
-Answer: seasons, dedicated, content, believe, How
-Metadata: {'original_words': ['believe', 'content', 'How', 'dedicated', 'seasons'], 'transformed_words': ['believe', 'content', 'How', 'dedicated', 'seasons'], 'direction': 'descending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['seasons', 'dedicated', 'content', 'believe', 'How']}
+prepare, provide, speak, surplus, after, unlink, change, 000
+Answer: unlink, surplus, speak, provide, prepare, change, after, 000
+Metadata: {'original_words': ['prepare', 'provide', 'speak', 'surplus', 'after', 'unlink', 'change', '000'], 'transformed_words': ['prepare', 'provide', 'speak', 'surplus', 'after', 'unlink', 'change', '000'], 'direction': 'descending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['unlink', 'surplus', 'speak', 'provide', 'prepare', 'change', 'after', '000']}
 
 Example 2:
-Question: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-owing, acute, included
-Answer: acute, included, owing
-Metadata: {'original_words': ['owing', 'acute', 'included'], 'transformed_words': ['owing', 'acute', 'included'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['acute', 'included', 'owing']}
+Question: Sort these words in descending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
+501, differences, Thus, cupola, longer, remaining, mummy, Paris, DISTRIBUTE
+Answer: remaining, mummy, longer, differences, cupola, Thus, Paris, DISTRIBUTE, 501
+Metadata: {'original_words': ['501', 'differences', 'Thus', 'cupola', 'longer', 'remaining', 'mummy', 'Paris', 'DISTRIBUTE'], 'transformed_words': ['501', 'differences', 'Thus', 'cupola', 'longer', 'remaining', 'mummy', 'Paris', 'DISTRIBUTE'], 'direction': 'descending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['remaining', 'mummy', 'longer', 'differences', 'cupola', 'Thus', 'Paris', 'DISTRIBUTE', '501']}
 
 Example 3:
 Question: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list:
-WARRANTY, tell, territory, Reckon, downloading
-Answer: Reckon, WARRANTY, downloading, tell, territory
-Metadata: {'original_words': ['WARRANTY', 'tell', 'territory', 'Reckon', 'downloading'], 'transformed_words': ['WARRANTY', 'tell', 'territory', 'Reckon', 'downloading'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['Reckon', 'WARRANTY', 'downloading', 'tell', 'territory']}
+discontinue, access, office, luminous, distributing
+Answer: access, discontinue, distributing, luminous, office
+Metadata: {'original_words': ['discontinue', 'access', 'office', 'luminous', 'distributing'], 'transformed_words': ['discontinue', 'access', 'office', 'luminous', 'distributing'], 'direction': 'ascending', 'transformation': <TextTransformation.ORIGINAL: 'original'>, 'sorted_words': ['access', 'discontinue', 'distributing', 'luminous', 'office']}
 
 ```
diff --git a/pyproject.toml b/pyproject.toml
index 3a546f8c..42e66005 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,8 +14,8 @@ requires-python = ">=3.11"
 dependencies = [
   "bfi==1.0.4",
   "cellpylib==2.4.0",
-  "sympy>=1.13.1", 
-  "magiccube==0.3.0", 
+  "sympy>=1.13.1",
+  "magiccube==0.3.0",
   "pyfiglet==1.0.2"
 ]
 classifiers = [
diff --git a/reasoning_gym/code/__init__.py b/reasoning_gym/code/__init__.py
index 680653c6..d250ad6e 100644
--- a/reasoning_gym/code/__init__.py
+++ b/reasoning_gym/code/__init__.py
@@ -7,7 +7,4 @@ Cognition tasks for training reasoning capabilities:
 
 from .bf import BFConfig, BFDataset
 
-__all__ = [
-    "BFConfig",
-    "BFDataset"
-]
+__all__ = ["BFConfig", "BFDataset"]
diff --git a/reasoning_gym/code/bf.py b/reasoning_gym/code/bf.py
index 0e47948c..c2697203 100644
--- a/reasoning_gym/code/bf.py
+++ b/reasoning_gym/code/bf.py
@@ -3,10 +3,10 @@ from random import Random
 from typing import Dict, Optional
 
 import bfi
-from .contrib.bfit.Compiler import Compiler, Minify
 
 from ..data.wordle_words import wordle_words
 from ..factory import ProceduralDataset, register_dataset
+from .contrib.bfit.Compiler import Compiler, Minify
 
 
 @dataclass
@@ -122,10 +122,11 @@ int main() {{
 
         if answer == None:
             return 0.0
-        if answer != entry['answer']:
+        if answer != entry["answer"]:
             return 0.01
         else:
-            return 1.0 # Yay
+            return 1.0  # Yay
+
 
 # Register the dataset
 register_dataset("bf", BFDataset, BFConfig)
diff --git a/reasoning_gym/code/contrib/bfit/BF-it.py b/reasoning_gym/code/contrib/bfit/BF-it.py
index 46545a29..ccdb3fc0 100644
--- a/reasoning_gym/code/contrib/bfit/BF-it.py
+++ b/reasoning_gym/code/contrib/bfit/BF-it.py
@@ -2,9 +2,9 @@
 
 import argparse
 import os
+
 import Interpreter
-from Compiler import Compiler
-from Compiler import Minify
+from Compiler import Compiler, Minify
 
 
 def process_args():
@@ -54,5 +54,5 @@ def compile_file():
         Interpreter.brainfuck(brainfuck_code)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     compile_file()
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Compiler.py b/reasoning_gym/code/contrib/bfit/Compiler/Compiler.py
index 276fae88..e1f60258 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Compiler.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Compiler.py
@@ -1,12 +1,18 @@
 #!/usr/bin/env python3
-from .Exceptions import BFSyntaxError, BFSemanticError
+from .Exceptions import BFSemanticError, BFSyntaxError
 from .FunctionCompiler import FunctionCompiler
 from .Functions import check_function_exists, get_function_object, insert_function_object
-from .General import is_token_literal, get_literal_token_code, unpack_literal_tokens_to_array_dimensions
-from .Globals import get_global_variables_size, get_variable_size, get_variable_dimensions, insert_global_variable, create_variable_from_definition
+from .General import get_literal_token_code, is_token_literal, unpack_literal_tokens_to_array_dimensions
+from .Globals import (
+    create_variable_from_definition,
+    get_global_variables_size,
+    get_variable_dimensions,
+    get_variable_size,
+    insert_global_variable,
+)
 from .Lexical_analyzer import analyze
-from .Optimizer import optimize
 from .LibraryFunctionCompiler import insert_library_functions
+from .Optimizer import optimize
 from .Parser import Parser
 from .Token import Token
 
@@ -29,20 +35,24 @@ class Compiler:
         # returns function named tuple
 
         if self.parser.current_token().type not in [Token.VOID, Token.INT]:
-            raise BFSemanticError("Function return type can be either void or int, not '%s'" % str(self.parser.current_token()))
+            raise BFSemanticError(
+                "Function return type can be either void or int, not '%s'" % str(self.parser.current_token())
+            )
 
         self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
 
         # save all tokens of this function
         function_name = self.parser.next_token(next_amount=1).data
-        RPAREN_index = self.parser.find_matching(starting_index=self.parser.current_token_index+2)  # first find RPAREN
+        RPAREN_index = self.parser.find_matching(
+            starting_index=self.parser.current_token_index + 2
+        )  # first find RPAREN
         self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
-        RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index+1)  # then find RBRACE
+        RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index + 1)  # then find RBRACE
 
         # take all tokens between INT and RBRACE and pass them to function object
-        function_tokens = self.parser.tokens[self.parser.current_token_index:RBRACE_index+1]
+        function_tokens = self.parser.tokens[self.parser.current_token_index : RBRACE_index + 1]
         # skip function definition
-        self.parser.advance_to_token_at_index(RBRACE_index+1)
+        self.parser.advance_to_token_at_index(RBRACE_index + 1)
 
         function = FunctionCompiler(function_name, function_tokens)
         return function
@@ -60,12 +70,12 @@ class Compiler:
         # if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
         ZERO_CELLS_BEFORE_USE = False
 
-        code = '[-]' if ZERO_CELLS_BEFORE_USE else ''
+        code = "[-]" if ZERO_CELLS_BEFORE_USE else ""
         if get_variable_size(variable) > 1:  # its an array
             if self.parser.current_token().type == Token.SEMICOLON:
                 # array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
                 self.parser.advance_token()  # skip SEMICOLON
-                code = (code + '>') * get_variable_size(variable)  # advance to after this variable
+                code = (code + ">") * get_variable_size(variable)  # advance to after this variable
                 return code
             elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
                 # array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
@@ -79,25 +89,34 @@ class Compiler:
                 self.parser.advance_token()  # skip SEMICOLON
 
                 array_dimensions = get_variable_dimensions(variable)
-                unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list)
+                unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
+                    ID_token, array_dimensions, literal_tokens_list
+                )
 
                 for literal in unpacked_literals_list:
                     code += get_literal_token_code(literal)  # evaluate this literal and point to next array element
                 return code
             else:
-                raise BFSyntaxError("Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)" % self.parser.current_token())
+                raise BFSyntaxError(
+                    "Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)"
+                    % self.parser.current_token()
+                )
 
         elif self.parser.current_token().type == Token.SEMICOLON:  # no need to initialize
             self.parser.advance_token()  # skip SEMICOLON
-            code += '>'  # advance to after this variable
+            code += ">"  # advance to after this variable
         else:
             self.parser.check_current_token_is(Token.ASSIGN)
             if self.parser.current_token().data != "=":
-                raise BFSyntaxError("Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token())
+                raise BFSyntaxError(
+                    "Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()
+                )
             self.parser.advance_token()  # skip ASSIGN
 
             if not is_token_literal(self.parser.current_token()):
-                raise BFSemanticError("Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token()))
+                raise BFSemanticError(
+                    "Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())
+                )
 
             code += get_literal_token_code(self.parser.current_token())
 
@@ -113,7 +132,7 @@ class Compiler:
         When encountering global variable definition - create Variable object
         Returns code that initializes global variables and advances the pointer to after them
         """
-        code = ''
+        code = ""
         token = self.parser.current_token()
         while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
             if token.type == Token.SEMICOLON:  # can have random semicolons ;)
@@ -125,22 +144,31 @@ class Compiler:
             if self.parser.next_token(next_amount=2).type == Token.LPAREN:
                 function = self.create_function_object()
                 insert_function_object(function)
-            elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [Token.SEMICOLON, Token.ASSIGN, Token.LBRACK]:
+            elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [
+                Token.SEMICOLON,
+                Token.ASSIGN,
+                Token.LBRACK,
+            ]:
                 code += self.compile_global_variable_definition()
             else:
-                raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)" % (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token())))
+                raise BFSyntaxError(
+                    "Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)"
+                    % (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))
+                )
 
             token = self.parser.current_token()
 
         if self.parser.current_token() is not None:  # we have not reached the last token
-            untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index:]]
+            untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index :]]
             raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
 
         return code
 
     def compile(self):
         insert_library_functions()
-        code = self.process_global_definitions()  # code that initializes global variables and advances pointer to after them
+        code = (
+            self.process_global_definitions()
+        )  # code that initializes global variables and advances pointer to after them
 
         check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
         code += get_function_object("main").get_code(get_global_variables_size())
@@ -159,7 +187,7 @@ def compile(code, optimize_code=False):
     return brainfuck_code
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     print("This file cannot be directly run")
     print("Please import it and use the 'compile' function")
     print("Which receives a C-like code (string) and returns Brainfuck code (string)")
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/FunctionCompiler.py b/reasoning_gym/code/contrib/bfit/Compiler/FunctionCompiler.py
index 7eaa9877..1603ac64 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/FunctionCompiler.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/FunctionCompiler.py
@@ -1,11 +1,28 @@
 from collections import namedtuple
 from functools import reduce
-from .Exceptions import BFSyntaxError, BFSemanticError
+
+from .Exceptions import BFSemanticError, BFSyntaxError
 from .Functions import check_function_exists, get_function_object
-from .General import get_variable_dimensions_from_token, get_move_to_return_value_cell_code, get_print_string_code, get_variable_from_ID_token
-from .General import get_literal_token_value, process_switch_cases, is_token_literal
+from .General import (
+    get_literal_token_value,
+    get_move_to_return_value_cell_code,
+    get_print_string_code,
+    get_variable_dimensions_from_token,
+    get_variable_from_ID_token,
+    is_token_literal,
+    process_switch_cases,
+)
 from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
-from .Node import NodeToken, NodeTernary, NodeArraySetElement, NodeUnaryPrefix, NodeUnaryPostfix, NodeArrayGetElement, NodeFunctionCall, NodeArrayAssignment
+from .Node import (
+    NodeArrayAssignment,
+    NodeArrayGetElement,
+    NodeArraySetElement,
+    NodeFunctionCall,
+    NodeTernary,
+    NodeToken,
+    NodeUnaryPostfix,
+    NodeUnaryPrefix,
+)
 from .Parser import Parser
 from .Token import Token
 
@@ -83,7 +100,9 @@ class FunctionCompiler:
         # new stack pointer should be at least that size
         assert self.current_stack_pointer() <= current_stack_pointer
         self.return_value_cell = current_stack_pointer
-        self.set_stack_pointer(current_stack_pointer+1)  # make room for return_value cell. next available cell is the next one after it.
+        self.set_stack_pointer(
+            current_stack_pointer + 1
+        )  # make room for return_value cell. next available cell is the next one after it.
         function_code = self.compile_function_scope(self.parameters)
         self.remove_ids_map()  # Global variables
         return function_code
@@ -123,8 +142,12 @@ class FunctionCompiler:
 
             # multiply by next dimensions sizes
             multiply_amount = reduce(lambda x, y: x * y, dimensions[1:])  # size of the following dimensions
-            node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)))
-            index_expression = NodeToken(self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount)
+            node_token_multiply_amount = NodeToken(
+                self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))
+            )
+            index_expression = NodeToken(
+                self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount
+            )
 
             # handle next dimensions
             dimension = 1
@@ -132,8 +155,10 @@ class FunctionCompiler:
                 if self.parser.current_token().type != Token.LBRACK:  # too few indexes given...
                     if dimension == 1:
                         return first_index_expression  # allow use of only one dimension for multi-dimensional array
-                    raise BFSemanticError("%s is a %s-dimensional array, but only %s dimension(s) given as index" %
-                                          (str(ID_token), len(dimensions), dimension))
+                    raise BFSemanticError(
+                        "%s is a %s-dimensional array, but only %s dimension(s) given as index"
+                        % (str(ID_token), len(dimensions), dimension)
+                    )
                 self.parser.check_current_token_is(Token.LBRACK)
                 self.parser.advance_token()  # skip LBRACK
                 exp = self.expression()
@@ -143,19 +168,30 @@ class FunctionCompiler:
 
                 # current_dimension_index *= size_of_following_dimensions
                 if dimension + 1 < len(dimensions):  # not last dimension - need to multiply and add
-                    multiply_amount = reduce(lambda x, y: x * y, dimensions[dimension + 1:])  # size of the following dimensions
-                    node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)))
-                    multiply_node = NodeToken(self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount)
+                    multiply_amount = reduce(
+                        lambda x, y: x * y, dimensions[dimension + 1 :]
+                    )  # size of the following dimensions
+                    node_token_multiply_amount = NodeToken(
+                        self.ids_map_list,
+                        token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)),
+                    )
+                    multiply_node = NodeToken(
+                        self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount
+                    )
 
                     # prev_dimensions_index += current_dimension_index
-                    index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=multiply_node)
+                    index_expression = NodeToken(
+                        self.ids_map_list, token=add_token, left=index_expression, right=multiply_node
+                    )
                 else:  # last dimension - no need to multiply, just add
                     index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
                 dimension += 1
 
         if self.parser.current_token().type == Token.LBRACK:  # too many indexes given...
-            raise BFSemanticError("%s is a %s-dimensional array. Unexpected %s" %
-                                  (str(ID_token), len(dimensions), self.parser.current_token()))
+            raise BFSemanticError(
+                "%s is a %s-dimensional array. Unexpected %s"
+                % (str(ID_token), len(dimensions), self.parser.current_token())
+            )
         return index_expression
 
     def get_token_after_array_access(self, offset=0):
@@ -193,12 +229,18 @@ class FunctionCompiler:
 
         if self.parser.next_token().type == Token.SEMICOLON:  # INT ID SEMICOLON
             self.parser.advance_token(2)  # skip ID SEMICOLON
-            return ''  # no code is generated here. code was generated for defining this variable when we entered the scope
+            return (
+                ""  # no code is generated here. code was generated for defining this variable when we entered the scope
+            )
 
-        elif self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "=":  # INT ID = EXPRESSION SEMICOLON
+        elif (
+            self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "="
+        ):  # INT ID = EXPRESSION SEMICOLON
             return self.compile_expression_as_statement()  # compile_expression_as_statement skips the SEMICOLON
 
-        elif self.parser.next_token().type == Token.LBRACK:  # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
+        elif (
+            self.parser.next_token().type == Token.LBRACK
+        ):  # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
             # array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
             token_id = self.parser.current_token()
             self.parser.advance_token()  # skip ID
@@ -210,7 +252,7 @@ class FunctionCompiler:
                 initialization_node = self.compile_array_assignment(token_id)
                 code = initialization_node.get_code(self.current_stack_pointer()) + "<"  # discard expression value
             else:
-                code = ''  # just array definition
+                code = ""  # just array definition
                 # no code is generated here. code was generated for defining this variable when we entered the scope
             self.parser.check_current_token_is(Token.SEMICOLON)
             self.parser.advance_token()  # skip SEMICOLON
@@ -297,7 +339,9 @@ class FunctionCompiler:
             token = self.tokens[i]
 
             if token.type == Token.INT:
-                if self.tokens[i-2].type != Token.FOR:  # if it is not a definition inside a FOR statement (for (int i = 0...))
+                if (
+                    self.tokens[i - 2].type != Token.FOR
+                ):  # if it is not a definition inside a FOR statement (for (int i = 0...))
                     variable = create_variable_from_definition(self.parser, index=i)
                     self.insert_to_ids_map(variable)
 
@@ -333,7 +377,7 @@ class FunctionCompiler:
         for parameter in parameters:
             self.insert_to_ids_map(parameter)
 
-        code = '>'  # skip return_value_cell
+        code = ">"  # skip return_value_cell
         code += self.insert_scope_variables_into_ids_map()
         # this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables
 
@@ -377,7 +421,9 @@ class FunctionCompiler:
         if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
             return self.function_call()
 
-        if token.type == Token.ID and self.parser.next_token().type == Token.LBRACK:  # array - ID(LBRACK expression RBRACK)+
+        if (
+            token.type == Token.ID and self.parser.next_token().type == Token.LBRACK
+        ):  # array - ID(LBRACK expression RBRACK)+
             index_expression = self.get_array_index_expression()
             return NodeArrayGetElement(self.ids_map_list, token, index_expression)
 
@@ -386,7 +432,10 @@ class FunctionCompiler:
             return NodeToken(self.ids_map_list, token=token)
 
         if token.type != Token.LPAREN:
-            raise BFSyntaxError("Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))" % str(token))
+            raise BFSyntaxError(
+                "Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))"
+                % str(token)
+            )
 
         # ( expression )
         self.parser.check_current_token_is(Token.LPAREN)
@@ -417,7 +466,9 @@ class FunctionCompiler:
 
         if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
             if token.type == Token.BINOP and token.data not in ["+", "-"]:
-                    raise BFSyntaxError("Expected either + or - as unary prefix instead of token %s" % self.parser.current_token())
+                raise BFSyntaxError(
+                    "Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()
+                )
             self.parser.advance_token()
             unary_prefix = self.unary_prefix()
 
@@ -618,11 +669,19 @@ class FunctionCompiler:
 
             expression_node = self.expression()
 
-            new_node = NodeToken(self.ids_map_list, left=NodeToken(self.ids_map_list, token=id_token), token=assign_token, right=expression_node)
+            new_node = NodeToken(
+                self.ids_map_list,
+                left=NodeToken(self.ids_map_list, token=id_token),
+                token=assign_token,
+                right=expression_node,
+            )
             return new_node
 
-        elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \
-                self.get_token_after_array_access().type == Token.ASSIGN:
+        elif (
+            self.parser.current_token().type == Token.ID
+            and self.parser.next_token().type == Token.LBRACK
+            and self.get_token_after_array_access().type == Token.ASSIGN
+        ):
             # ID (LBRACK expression RBRACK)+ ASSIGN value_expression
             id_token = self.parser.current_token()
             index_expression = self.get_array_index_expression()
@@ -744,7 +803,7 @@ class FunctionCompiler:
         if self.parser.current_token().type == Token.SEMICOLON:
             # return;
             self.parser.advance_token()  # skip ;
-            return ''  # nothing to do
+            return ""  # nothing to do
 
         # return exp;
         expression_code = self.compile_expression()
@@ -763,7 +822,12 @@ class FunctionCompiler:
         # this expression can be used as a statement.
         # e.g: x+=5;  or  x++ or ++x;
 
-        assert self.parser.current_token().type in [Token.ID, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]
+        assert self.parser.current_token().type in [
+            Token.ID,
+            Token.INCREMENT,
+            Token.DECREMENT,
+            Token.UNARY_MULTIPLICATIVE,
+        ]
 
         code = self.compile_expression()
         self.parser.check_current_token_is(Token.SEMICOLON)
@@ -901,7 +965,10 @@ class FunctionCompiler:
         self.increase_stack_pointer()  # use 1 additional temp cell for indicating we need to execute a case
         cases = list()  # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))
 
-        while self.parser.current_token().type in [Token.CASE, Token.DEFAULT]:  # (default | CASE literal) COLON statement* break;? statements*
+        while self.parser.current_token().type in [
+            Token.CASE,
+            Token.DEFAULT,
+        ]:  # (default | CASE literal) COLON statement* break;? statements*
             if self.parser.current_token().type == Token.CASE:
                 self.parser.advance_token()  # skip CASE
                 constant_value_token = self.parser.current_token()
@@ -922,7 +989,9 @@ class FunctionCompiler:
 
             inner_case_code = ""
             while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
-                inner_case_code += self.compile_statement(allow_declaration=False)  # not allowed to declare variables directly inside case
+                inner_case_code += self.compile_statement(
+                    allow_declaration=False
+                )  # not allowed to declare variables directly inside case
 
             has_break = False
             if self.parser.current_token().type == Token.BREAK:  # ignore all statements after break
@@ -934,7 +1003,9 @@ class FunctionCompiler:
             cases.append((value, inner_case_code, has_break))
 
         if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
-            raise BFSyntaxError("Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token())
+            raise BFSyntaxError(
+                "Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()
+            )
         self.parser.check_current_token_is(Token.RBRACE)
         self.parser.advance_token()
         self.decrease_stack_pointer(amount=2)
@@ -943,7 +1014,10 @@ class FunctionCompiler:
 
     def compile_break(self):
         # TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
-        raise NotImplementedError("Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s" % self.parser.current_token())
+        raise NotImplementedError(
+            "Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s"
+            % self.parser.current_token()
+        )
 
     def compile_for(self):
         # for (statement expression; expression) inner_scope_code   note: statement contains ;, and inner_scope_code can be scope { }
@@ -951,17 +1025,17 @@ class FunctionCompiler:
         # (the statement cannot contain scope - { and } )
 
         """
-            <for> is a special case of scope
-            the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
-            so we manually compile the scope instead of using self.compile_scope():
+        <for> is a special case of scope
+        the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
+        so we manually compile the scope instead of using self.compile_scope():
 
-            we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
-            we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
-            (this needs to be done before the <for> definition's statement)
-            next, inside the for's scope {}:
-            after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
-            after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
-            finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
+        we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
+        we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
+        (this needs to be done before the <for> definition's statement)
+        next, inside the for's scope {}:
+        after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
+        after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
+        finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
         """
 
         self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
@@ -969,7 +1043,7 @@ class FunctionCompiler:
 
         manually_inserted_variable_in_for_definition = False
         variable = None
-        code = ''
+        code = ""
 
         # =============== enter FOR scope ===============
         self.add_ids_map()
@@ -987,7 +1061,10 @@ class FunctionCompiler:
                 show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN
 
             if show_side_effect_warning:
-                print("[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects" % self.parser.next_token())
+                print(
+                    "[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects"
+                    % self.parser.next_token()
+                )
 
         if self.parser.current_token().type == Token.LBRACE:  # statement is a scope
             raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
@@ -1042,20 +1119,31 @@ class FunctionCompiler:
         token = self.parser.current_token()
         if token.type == Token.INT:  # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
             if not allow_declaration:
-                raise BFSemanticError("Cannot define variable (%s) directly inside case. "
-                                      "Can define inside new scope {} or outside the switch statement" % token)
+                raise BFSemanticError(
+                    "Cannot define variable (%s) directly inside case. "
+                    "Can define inside new scope {} or outside the switch statement" % token
+                )
             return self.compile_variable_declaration()
 
         elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:  # ++ID;
             return self.compile_expression_as_statement()
 
         elif token.type == Token.ID:
-            if self.parser.next_token().type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
+            if self.parser.next_token().type in [
+                Token.ASSIGN,
+                Token.LBRACK,
+                Token.INCREMENT,
+                Token.DECREMENT,
+                Token.UNARY_MULTIPLICATIVE,
+            ]:
                 # ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
                 return self.compile_expression_as_statement()
             elif self.parser.next_token().type == Token.LPAREN:  # ID(...);  (function call)
                 return self.compile_function_call_statement()
-            raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)" % (str(self.parser.next_token()), str(token)))
+            raise BFSyntaxError(
+                "Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)"
+                % (str(self.parser.next_token()), str(token))
+            )
 
         elif token.type == Token.PRINT:
             return self.compile_print_string()
@@ -1097,7 +1185,7 @@ class FunctionCompiler:
     def compile_scope_statements(self):
         tokens = self.tokens
 
-        code = ''
+        code = ""
         while self.parser.current_token() is not None:
             if self.parser.current_token().type == Token.RBRACE:
                 # we reached the end of our scope
@@ -1124,29 +1212,29 @@ class FunctionCompiler:
         # will be inserted into the new scope prior to the scope's compilation
 
         """
-            example layout:
-                int global_var1;
-                int global_var2;
-                int foo(int a, int b) {
-                    int x;
-                    int y;
-                    return 5;
-                }
+        example layout:
+            int global_var1;
+            int global_var2;
+            int foo(int a, int b) {
+                int x;
+                int y;
+                return 5;
+            }
 
-                int main() {
-                    int n;
-                    foo(1, 2);
-                }
+            int main() {
+                int n;
+                foo(1, 2);
+            }
 
-                global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
+            global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
 
-                calling convention:
-                caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
-                callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
-                    can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
-                    can assume that the next cells match your parameters
-                    assumes that initially, the pointer points to the first cell (return_value_cell).
-                    therefore begin with '>' * (1 + parameters + scope variables)
+            calling convention:
+            caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
+            callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
+                can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
+                can assume that the next cells match your parameters
+                assumes that initially, the pointer points to the first cell (return_value_cell).
+                therefore begin with '>' * (1 + parameters + scope variables)
         """
 
         assert self.parser.current_token().type == Token.LBRACE
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Functions.py b/reasoning_gym/code/contrib/bfit/Compiler/Functions.py
index 837e3339..a4ed4b9a 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Functions.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Functions.py
@@ -1,4 +1,5 @@
 from copy import deepcopy
+
 from .Exceptions import BFSemanticError
 
 functions = dict()  # Global dictionary of function_name --> FunctionCompiler objects
@@ -30,4 +31,7 @@ def check_function_exists(function_token, parameters_amount):
 
     function = functions[function_name]
     if len(function.parameters) != parameters_amount:
-        raise BFSemanticError("Function '%s' has %s parameters (called it with %s parameters)" % (str(function_token), len(function.parameters), parameters_amount))
+        raise BFSemanticError(
+            "Function '%s' has %s parameters (called it with %s parameters)"
+            % (str(function_token), len(function.parameters), parameters_amount)
+        )
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/General.py b/reasoning_gym/code/contrib/bfit/Compiler/General.py
index 2a182b8a..6abab5cf 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/General.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/General.py
@@ -1,7 +1,8 @@
-from .Exceptions import BFSyntaxError, BFSemanticError
-from .Token import Token
 from functools import reduce
 
+from .Exceptions import BFSemanticError, BFSyntaxError
+from .Token import Token
+
 """
 This file holds functions that generate general Brainfuck code
 And general functions that are not dependent on other objects
@@ -126,23 +127,29 @@ def unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_d
     if len(array_dimensions) == 0:
         raise BFSemanticError("Tried to initialize array %s with too many nested sub-arrays" % ID_token)
     if len(literal_tokens_list) > array_dimensions[0]:
-        raise BFSemanticError("Tried to initialize array %s dimension %s with too many elements (%s)"
-                              % (ID_token, str(array_dimensions), str(len(literal_tokens_list))))
+        raise BFSemanticError(
+            "Tried to initialize array %s dimension %s with too many elements (%s)"
+            % (ID_token, str(array_dimensions), str(len(literal_tokens_list)))
+        )
 
     result = []
     for element in literal_tokens_list:
         if isinstance(element, list):
             # recursively unpack the list with the sub-dimension of the sub-array
             # E.g if we have arr[3][3][3] and then this call will fill [3][3]=9 elements
-            result.extend(unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element))
+            result.extend(
+                unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element)
+            )
         else:
             result.append(element)
             if len(array_dimensions) > 1:
                 dimension_size = dimensions_to_size(array_dimensions[1:])  # current size we need to fill
-                result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1))  # fill missing elements in this dimension with zeros
+                result.extend(
+                    [Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1)
+                )  # fill missing elements in this dimension with zeros
 
     dimension_size = dimensions_to_size(array_dimensions)  # current size we need to fill
-    result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size-len(result)))  # fill the result with zeros
+    result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - len(result)))  # fill the result with zeros
     return result
 
 
@@ -157,13 +164,20 @@ def unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, litera
     if all(not isinstance(element, list) for element in literal_tokens_list):
         # special case - if all elements are literals, then we allow assigning them as-is and not care about dimensions
         # E.g if we have arr[3][3][3] = {1,2,3,4} then return [1,2,3,4,0,0,0,0,0]
-        unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (array_size - len(literal_tokens_list))  # fill missing with zeros
+        unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (
+            array_size - len(literal_tokens_list)
+        )  # fill missing with zeros
     else:
-        unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list)
+        unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(
+            ID_token, array_dimensions, literal_tokens_list
+        )
 
     if len(unpacked_literals_list) > array_size:
-        raise BFSemanticError("Tried to initialize array %s with incompatible amount of literals."
-                              " (array size is %s and literals size is %s)" % (ID_token, str(array_size), str(len(unpacked_literals_list))))
+        raise BFSemanticError(
+            "Tried to initialize array %s with incompatible amount of literals."
+            " (array size is %s and literals size is %s)"
+            % (ID_token, str(array_size), str(len(unpacked_literals_list)))
+        )
     assert len(unpacked_literals_list) == array_size
     return unpacked_literals_list
 
@@ -208,17 +222,19 @@ def process_switch_cases(expression_code, cases):
     code += "<"  # point to expression
 
     if all_cases_have_break:  # small optimization for evaluating the expression
-        cases = [case for case in cases if case[0] != "default"]  # remove default to be able to sort. it is handled differently
+        cases = [
+            case for case in cases if case[0] != "default"
+        ]  # remove default to be able to sort. it is handled differently
         cases.sort(key=lambda x: x[0], reverse=True)  # Can sort since correct flow is not needed
 
     """
         This loop compares the expression value to each case in the switch-case statement, in reverse order
         It does so by increasing and decreasing expression, and comparing result to 0
-        E.G. if we have 
+        E.G. if we have
             switch(x) {
                 case 2:
                 case 0:
-                case 5: 
+                case 5:
                 case 1:
             }
         x will be put in <expression> cell, then:
@@ -244,7 +260,7 @@ def process_switch_cases(expression_code, cases):
     <need_to_execute=1>
     <compare_with_1>    [
     <compare_with_5>        [
-    <compare_with_0>            [ 
+    <compare_with_0>            [
     <compare_with_2>                [
                                         <default_code> <expression_value=0> <need_to_execute=0>
                                     ]   <if need_to_execute> <code_for_2> <need_to_execute=0>
@@ -487,22 +503,22 @@ def get_bitwise_code(code_logic):
     code += "<<"  # point to a
 
     code += "["  # while a != 0:
-    code +=     "-"  # a -= 1
-    code +=     ">>-"  # c -= 1
-    code +=     "[>+>>+<<<-]>[<+>-]"  # copy c to y (using w)
-    code +=     ">>"  # point to y
-    code +=     ">>+<<"  # bit1 += 1
+    code += "-"  # a -= 1
+    code += ">>-"  # c -= 1
+    code += "[>+>>+<<<-]>[<+>-]"  # copy c to y (using w)
+    code += ">>"  # point to y
+    code += ">>+<<"  # bit1 += 1
 
-    code +=     "-["  # if y != 1:
-    code +=         "<+"  # x += 1
-    code +=         "<<++"  # c += 2 (c was 0)
-    code +=         ">" * 5  # point to bit1
-    code +=         "--"  # bit1 -= 2 (bit1 was 2)
-    code +=         "<<"  # point to y
-    code +=         "+"  # set y to 0
-    code +=     "]"  # end if
+    code += "-["  # if y != 1:
+    code += "<+"  # x += 1
+    code += "<<++"  # c += 2 (c was 0)
+    code += ">" * 5  # point to bit1
+    code += "--"  # bit1 -= 2 (bit1 was 2)
+    code += "<<"  # point to y
+    code += "+"  # set y to 0
+    code += "]"  # end if
 
-    code +=     "<<<<<"  # point to a
+    code += "<<<<<"  # point to a
     code += "]"  # end while
 
     code += ">>>>[<<<<+>>>>-]"  # move x to a (x is a/2)
@@ -510,21 +526,21 @@ def get_bitwise_code(code_logic):
     code += "<"  # point to b
 
     code += "["  # while b != 0:
-    code +=     "-"  # b -= 1
-    code +=     ">-"  # c -= 1
-    code +=     "[>+>>+<<<-]>[<+>-]"  # copy c to y (using w)
-    code +=     ">>"  # point to y
-    code +=     ">+<"  # z += 1
+    code += "-"  # b -= 1
+    code += ">-"  # c -= 1
+    code += "[>+>>+<<<-]>[<+>-]"  # copy c to y (using w)
+    code += ">>"  # point to y
+    code += ">+<"  # z += 1
 
-    code +=     "-["  # if y != 1:
-    code +=         ">--<"  # z -= 2 (z was 2)
-    code +=         "<+"  # x += 1
-    code +=         "<<++"  # c += 2 (c was 0)
-    code +=         ">>>"  # point to y
-    code +=         "+"  # set y to 0
-    code +=     "]"
+    code += "-["  # if y != 1:
+    code += ">--<"  # z -= 2 (z was 2)
+    code += "<+"  # x += 1
+    code += "<<++"  # c += 2 (c was 0)
+    code += ">>>"  # point to y
+    code += "+"  # set y to 0
+    code += "]"
 
-    code +=     "<<<<"  # point to b
+    code += "<<<<"  # point to b
     code += "]"  # end while
 
     # w is a % 2
@@ -658,14 +674,14 @@ def get_unary_prefix_op_code(token, offset_to_variable=None):
         assert token.data in ["+", "-"]
         if token.data == "+":
             # keep value as-is
-            return '>'
+            return ">"
         elif token.data == "-":
             # a temp
-            code = ">[-]" # zero temp
-            code += "<" # point to a
-            code += "[->-<]" # sub a from temp
-            code += ">" # point to temp
-            code += "[<+>-]" # copy temp to a
+            code = ">[-]"  # zero temp
+            code += "<"  # point to a
+            code += "[->-<]"  # sub a from temp
+            code += ">"  # point to temp
+            code += "[<+>-]"  # copy temp to a
             return code
     raise NotImplementedError
 
@@ -1127,7 +1143,6 @@ def get_op_boolean_operator_code(node, current_pointer):
     raise NotImplementedError
 
 
-
 def get_print_string_code(string):
     code = "[-]"  # zero the current cell
     code += ">[-]"  # zero the next cell (will be used for loop counts)
@@ -1200,6 +1215,7 @@ def get_move_left_index_cell_code():
 #     General
 # =================
 
+
 def get_literal_token_value(token):
     # known at compilation time
     assert is_token_literal(token)
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Globals.py b/reasoning_gym/code/contrib/bfit/Compiler/Globals.py
index 5c37c59e..0eaaac80 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Globals.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Globals.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
-from .Token import Token
+
 from .General import dimensions_to_size, get_NUM_token_value
+from .Token import Token
 
 """
 This file holds the program's functions and global variables
@@ -55,7 +56,7 @@ def create_variable_from_definition(parser, index=None, advance_tokens=False):
     if index is None, then assumes we start at the current_token_index
     if advance_tokens is True, then modifies current_token_index accordingly using parser.advance_token()
     """
-    
+
     if index is None:
         index = parser.current_token_index
 
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Lexical_analyzer.py b/reasoning_gym/code/contrib/bfit/Compiler/Lexical_analyzer.py
index 1c3e5e0a..091bbcf7 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Lexical_analyzer.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Lexical_analyzer.py
@@ -1,6 +1,7 @@
 import re
-from .Token import Token
+
 from .Optimizer import optimize
+from .Token import Token
 
 
 class LexicalErrorException(Exception):
@@ -14,64 +15,59 @@ def analyze(text):
     """
 
     rules = [
-        ('\s+', Token.WHITESPACE),
-        ('void',    Token.VOID),
-        ('int',     Token.INT),
-        ('bool', Token.INT),  # treat bool as int
-        ('char', Token.INT),  # treat char as int
-
-        ('true', Token.TRUE),
-        ('false', Token.FALSE),
-        ('&&', Token.AND),
-        ('\|\|', Token.OR),
-        ('\!', Token.NOT),
-        ('return', Token.RETURN),
-        ('if', Token.IF),
-        ('else', Token.ELSE),
-        ('while', Token.WHILE),
-        ('for', Token.FOR),
-        ('do', Token.DO),
-        ('print', Token.PRINT),
-        ('switch', Token.SWITCH),
-        ('case', Token.CASE),
-        ('default', Token.DEFAULT),
-        ('break', Token.BREAK),
-        ('continue', Token.CONTINUE),  # todo
-        (':', Token.COLON),
-        (';', Token.SEMICOLON),
-        (',', Token.COMMA),
-
-        ('\(', Token.LPAREN),
-        ('\)', Token.RPAREN),
-        ('\{', Token.LBRACE),
-        ('\}', Token.RBRACE),
-        ('\[', Token.LBRACK),
-        ('\]', Token.RBRACK),
-        ('=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=', Token.ASSIGN),
-        ('\?', Token.TERNARY),
-
-        ('<=|>=|==|!=|<|>', Token.RELOP),
-        ('\+\+', Token.INCREMENT),
-        ('--', Token.DECREMENT),
-        ('\+|-|\*|/|%', Token.BINOP),
-        ('\*\*|//|%%', Token.UNARY_MULTIPLICATIVE),
-
-        ('<<|>>', Token.BITWISE_SHIFT),
-        ('~', Token.BITWISE_NOT),
-        ('&', Token.BITWISE_AND),
-        ('\|', Token.BITWISE_OR),
-        ('\^', Token.BITWISE_XOR),
-
-        ('([a-zA-Z_][a-zA-Z0-9_]*)',    Token.ID),
-        ('(\d+)',     Token.NUM),
-        ('(0x[A-Fa-f\d]+)',     Token.NUM),  # hexadecimal number
-        ('(0o[0-7]+)',     Token.NUM),  # octal number
-        ('(0b[01]+)',     Token.NUM),  # binary number
-        (r'\"(\\\"|[^"])*"',   Token.STRING),
-        (r'\'(\\\'|(\\)?[^\'])\'', Token.CHAR),
-        ('//.*(\\n|$)', Token.COMMENT),
-        (r'/\*[\s\S]*?\*/', Token.COMMENT),  # multiline comments
-        ('.',       Token.UNIDENTIFIED)
+        (r"\s+", Token.WHITESPACE),
+        ("void", Token.VOID),
+        ("int", Token.INT),
+        ("bool", Token.INT),  # treat bool as int
+        ("char", Token.INT),  # treat char as int
+        ("true", Token.TRUE),
+        ("false", Token.FALSE),
+        ("&&", Token.AND),
+        (r"\|\|", Token.OR),
+        (r"\!", Token.NOT),
+        ("return", Token.RETURN),
+        ("if", Token.IF),
+        ("else", Token.ELSE),
+        ("while", Token.WHILE),
+        ("for", Token.FOR),
+        ("do", Token.DO),
+        ("print", Token.PRINT),
+        ("switch", Token.SWITCH),
+        ("case", Token.CASE),
+        ("default", Token.DEFAULT),
+        ("break", Token.BREAK),
+        ("continue", Token.CONTINUE),  # todo
+        (":", Token.COLON),
+        (";", Token.SEMICOLON),
+        (",", Token.COMMA),
+        (r"\(", Token.LPAREN),
+        (r"\)", Token.RPAREN),
+        (r"\{", Token.LBRACE),
+        (r"\}", Token.RBRACE),
+        (r"\[", Token.LBRACK),
+        (r"\]", Token.RBRACK),
+        (r"=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=", Token.ASSIGN),
+        (r"\?", Token.TERNARY),
+        (r"<=|>=|==|!=|<|>", Token.RELOP),
+        (r"\+\+", Token.INCREMENT),
+        ("--", Token.DECREMENT),
+        (r"\+|-|\*|/|%", Token.BINOP),
+        (r"\*\*|//|%%", Token.UNARY_MULTIPLICATIVE),
+        ("<<|>>", Token.BITWISE_SHIFT),
+        ("~", Token.BITWISE_NOT),
+        ("&", Token.BITWISE_AND),
+        (r"\|", Token.BITWISE_OR),
+        (r"\^", Token.BITWISE_XOR),
+        ("([a-zA-Z_][a-zA-Z0-9_]*)", Token.ID),
+        (r"(\d+)", Token.NUM),
+        (r"(0x[A-Fa-f\d]+)", Token.NUM),  # hexadecimal number
+        ("(0o[0-7]+)", Token.NUM),  # octal number
+        ("(0b[01]+)", Token.NUM),  # binary number
+        (r'\"(\\\"|[^"])*"', Token.STRING),
+        (r"\'(\\\'|(\\)?[^\'])\'", Token.CHAR),
+        ("//.*(\\n|$)", Token.COMMENT),
+        (r"/\*[\s\S]*?\*/", Token.COMMENT),  # multiline comments
+        (".", Token.UNIDENTIFIED),
     ]
 
     rules = [(re.compile(r), t) for r, t in rules]
@@ -79,7 +75,7 @@ def analyze(text):
     tokens = []
 
     # create a mapping of [line number] to [offset of that line from the beginning of the text]
-    newline = re.compile('\n')
+    newline = re.compile("\n")
     lines = [0] + [m.end() for m in re.finditer(newline, text)]
 
     i = 0
@@ -99,12 +95,12 @@ def analyze(text):
 
         # calculate line and column
         line, column = None, None
-        for line_idx in range(len(lines)-1):
-            if lines[line_idx] <= longest_match.start() < lines[line_idx+1]:
-                line, column = line_idx+1, (longest_match.start() - lines[line_idx])+1  # humans count from 1 :)
+        for line_idx in range(len(lines) - 1):
+            if lines[line_idx] <= longest_match.start() < lines[line_idx + 1]:
+                line, column = line_idx + 1, (longest_match.start() - lines[line_idx]) + 1  # humans count from 1 :)
                 break
         if not line:
-            line, column = len(lines), (longest_match.start() - lines[-1])+1
+            line, column = len(lines), (longest_match.start() - lines[-1]) + 1
 
         if matched_token in [Token.COMMENT, Token.WHITESPACE]:
             pass  # do nothing
@@ -112,8 +108,18 @@ def analyze(text):
             raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
         elif matched_token in [Token.STRING, Token.CHAR]:
             # remove quotes at beginning and end, un-escape characters
-            tokens.append(Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape")))
-        elif matched_token in [Token.NUM, Token.ID, Token.BINOP, Token.RELOP, Token.ASSIGN, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_SHIFT]:
+            tokens.append(
+                Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))
+            )
+        elif matched_token in [
+            Token.NUM,
+            Token.ID,
+            Token.BINOP,
+            Token.RELOP,
+            Token.ASSIGN,
+            Token.UNARY_MULTIPLICATIVE,
+            Token.BITWISE_SHIFT,
+        ]:
             tokens.append(Token(matched_token, line, column, longest_match.group()))
         else:
             tokens.append(Token(matched_token, line, column))
@@ -128,16 +134,40 @@ def tests():
         text = "my international int ; int; pints; international;"
         res = analyze(text)
 
-        expected = [Token.ID, Token.ID, Token.INT, Token.SEMICOLON, Token.INT, Token.SEMICOLON, Token.ID,
-                    Token.SEMICOLON, Token.ID, Token.SEMICOLON]
+        expected = [
+            Token.ID,
+            Token.ID,
+            Token.INT,
+            Token.SEMICOLON,
+            Token.INT,
+            Token.SEMICOLON,
+            Token.ID,
+            Token.SEMICOLON,
+            Token.ID,
+            Token.SEMICOLON,
+        ]
         assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
 
     def test2():
         text = "true !||!false falsek  k||y+-a&&x"
         res = analyze(text)
 
-        expected = [Token.TRUE, Token.NOT, Token.OR, Token.NOT, Token.FALSE, Token.ID, Token.ID, Token.OR, Token.ID,
-                    Token.BINOP, Token.BINOP, Token.ID, Token.AND, Token.ID]
+        expected = [
+            Token.TRUE,
+            Token.NOT,
+            Token.OR,
+            Token.NOT,
+            Token.FALSE,
+            Token.ID,
+            Token.ID,
+            Token.OR,
+            Token.ID,
+            Token.BINOP,
+            Token.BINOP,
+            Token.ID,
+            Token.AND,
+            Token.ID,
+        ]
         assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
 
     def test3():
@@ -166,9 +196,29 @@ def tests():
         # test all arithmetic operations
         text = "(1+2*3/6)+(1%3)*(6-1)"
         tokens = analyze(text)
-        expected = [Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM,
-                    Token.RPAREN, Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN,
-                    Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN]
+        expected = [
+            Token.LPAREN,
+            Token.NUM,
+            Token.BINOP,
+            Token.NUM,
+            Token.BINOP,
+            Token.NUM,
+            Token.BINOP,
+            Token.NUM,
+            Token.RPAREN,
+            Token.BINOP,
+            Token.LPAREN,
+            Token.NUM,
+            Token.BINOP,
+            Token.NUM,
+            Token.RPAREN,
+            Token.BINOP,
+            Token.LPAREN,
+            Token.NUM,
+            Token.BINOP,
+            Token.NUM,
+            Token.RPAREN,
+        ]
         assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
         optimize(tokens)
         assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
@@ -179,5 +229,5 @@ def tests():
     test3()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     tests()
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/LibraryFunctionCompiler.py b/reasoning_gym/code/contrib/bfit/Compiler/LibraryFunctionCompiler.py
index 5b6567d6..feb2497a 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/LibraryFunctionCompiler.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/LibraryFunctionCompiler.py
@@ -48,7 +48,9 @@ def get_readint_code():
     code += ">"  # point to tmp
     code += "[<++++++++++>-]"  # res = tmp * 10, tmp = 0
     code += ">"  # point to input
-    code += "-" * (0x30 - 10)  # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
+    code += "-" * (
+        0x30 - 10
+    )  # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
     code += "[<<+>>-]"  # res += input
     code += "]"  # end if
 
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Node.py b/reasoning_gym/code/contrib/bfit/Compiler/Node.py
index fadeeff8..581ace92 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Node.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Node.py
@@ -1,10 +1,20 @@
 from .Exceptions import BFSemanticError
-from .General import get_copy_from_variable_code, get_copy_to_variable_code
-from .General import get_move_left_index_cell_code, get_move_right_index_cells_code
-from .General import get_offset_to_variable, get_variable_dimensions_from_token
-from .General import get_op_between_literals_code, get_literal_token_code, get_token_ID_code
-from .General import get_unary_prefix_op_code, get_unary_postfix_op_code, is_token_literal
-from .General import unpack_literal_tokens_to_array_dimensions, get_op_boolean_operator_code
+from .General import (
+    get_copy_from_variable_code,
+    get_copy_to_variable_code,
+    get_literal_token_code,
+    get_move_left_index_cell_code,
+    get_move_right_index_cells_code,
+    get_offset_to_variable,
+    get_op_between_literals_code,
+    get_op_boolean_operator_code,
+    get_token_ID_code,
+    get_unary_postfix_op_code,
+    get_unary_prefix_op_code,
+    get_variable_dimensions_from_token,
+    is_token_literal,
+    unpack_literal_tokens_to_array_dimensions,
+)
 from .Token import Token
 
 """
@@ -60,7 +70,14 @@ class NodeToken(Node):
             else:
                 return get_literal_token_code(self.token)
 
-        elif self.token.type in [Token.BINOP, Token.RELOP, Token.BITWISE_SHIFT, Token.BITWISE_AND, Token.BITWISE_OR, Token.BITWISE_XOR]:
+        elif self.token.type in [
+            Token.BINOP,
+            Token.RELOP,
+            Token.BITWISE_SHIFT,
+            Token.BITWISE_AND,
+            Token.BITWISE_OR,
+            Token.BITWISE_XOR,
+        ]:
             code = self.left.get_code(current_pointer)
             code += self.right.get_code(current_pointer + 1)
             code += "<<"  # point to the first operand
@@ -78,7 +95,7 @@ class NodeToken(Node):
         elif self.token.type == Token.ASSIGN:
             assert self.left.token.type == Token.ID
 
-            if self.token.data == '=':
+            if self.token.data == "=":
                 # id = expression
                 code = self.right.get_code(current_pointer)
 
@@ -119,7 +136,7 @@ class NodeTernary(Node):
         code = ">"  # point to bool_evaluate_node_false
         code += "[-]+"  # bool_evaluate_node_false=1
         code += ">"  # point to condition
-        code += self.condition.get_code(current_pointer+2)  # evaluate condition
+        code += self.condition.get_code(current_pointer + 2)  # evaluate condition
         code += "<"  # point to condition
 
         code += "["  # if condition is non-zero
@@ -150,7 +167,14 @@ class NodeUnaryPrefix(Node):
 
     def get_code(self, current_pointer, *args, **kwargs):
         # unary prefix (!x or ++x or ~x or -x)
-        assert self.token_operation.type in [Token.NOT, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_NOT, Token.BINOP]
+        assert self.token_operation.type in [
+            Token.NOT,
+            Token.INCREMENT,
+            Token.DECREMENT,
+            Token.UNARY_MULTIPLICATIVE,
+            Token.BITWISE_NOT,
+            Token.BINOP,
+        ]
 
         if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
             code = self.node_literal.get_code(current_pointer)
@@ -178,10 +202,15 @@ class NodeUnaryPrefix(Node):
 
             # the token to apply on must be an ID
             if isinstance(self.node_literal, NodeToken) is False:
-                raise BFSemanticError("Prefix operator %s can only be applied to a variable" % str(self.token_operation))
+                raise BFSemanticError(
+                    "Prefix operator %s can only be applied to a variable" % str(self.token_operation)
+                )
 
             if self.node_literal.token.type != Token.ID:
-                raise BFSemanticError("Prefix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token)))
+                raise BFSemanticError(
+                    "Prefix operator %s cannot be applied to %s, but only to a variable"
+                    % (str(self.token_operation), str(self.node_literal.token))
+                )
 
             offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
             return get_unary_prefix_op_code(self.token_operation, offset_to_ID)
@@ -218,7 +247,10 @@ class NodeUnaryPostfix(Node):
             raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation))
 
         if self.node_literal.token.type != Token.ID:
-            raise BFSemanticError("Postfix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token)))
+            raise BFSemanticError(
+                "Postfix operator %s cannot be applied to %s, but only to a variable"
+                % (str(self.token_operation), str(self.node_literal.token))
+            )
 
         offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
         return get_unary_postfix_op_code(self.token_operation, offset_to_ID)
@@ -227,27 +259,31 @@ class NodeUnaryPostfix(Node):
 class NodeFunctionCall(Node):
     def __init__(self, ids_map_list, function_to_call, parameters):
         """
-            receives a FunctionCompiler object
-                that implements get_code() which gets a stack pointer and returns code
-            receives a list of parameters - Node objects
-                each one gets a stack pointer and returns code that evaluates the parameter
+        receives a FunctionCompiler object
+            that implements get_code() which gets a stack pointer and returns code
+        receives a list of parameters - Node objects
+            each one gets a stack pointer and returns code that evaluates the parameter
         """
         Node.__init__(self, ids_map_list)
         self.function_to_call = function_to_call
         self.parameters = parameters
 
     def get_code(self, current_pointer, *args, **kwargs):
-        code = '[-]>'  # return_value_cell=0
+        code = "[-]>"  # return_value_cell=0
 
         # evaluate parameters from left to right, and put them on the "stack" in that order
         # after each parameter code, the pointer points to the next available cell (one after the parameter)
         for i, parameter in enumerate(self.parameters):
-            code += parameter.get_code(current_pointer+1+i)  # evaluate each parameter at its cell offset (starting at one after return_value_cell)
+            code += parameter.get_code(
+                current_pointer + 1 + i
+            )  # evaluate each parameter at its cell offset (starting at one after return_value_cell)
 
         # at this point we point to one after the last parameter
         code += "<" * len(self.parameters)  # point back to first parameter
         code += "<"  # point to return_value_cell
-        code += self.function_to_call.get_code(current_stack_pointer=current_pointer)  # after this we point to return value cell
+        code += self.function_to_call.get_code(
+            current_stack_pointer=current_pointer
+        )  # after this we point to return value cell
         code += ">"  # point to next available cell (one after return value)
         return code
 
@@ -377,9 +413,10 @@ class NodeArraySetElement(NodeArrayElement):
 
 class NodeArrayAssignment(Node):
     """
-        Used for array assignment
-        E.g arr = = { 1, 2, 3... }
+    Used for array assignment
+    E.g arr = = { 1, 2, 3... }
     """
+
     def __init__(self, ids_map_list, token_id, literal_tokens_list):
         Node.__init__(self, ids_map_list)
         self.token_id = token_id
@@ -387,7 +424,9 @@ class NodeArrayAssignment(Node):
 
     def get_code(self, current_pointer, *args, **kwargs):
         array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id)
-        unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(self.token_id, array_dimensions, self.literal_tokens_list)
+        unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
+            self.token_id, array_dimensions, self.literal_tokens_list
+        )
 
         offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer)
         code = "<" * offset  # point to first array element
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Optimizer.py b/reasoning_gym/code/contrib/bfit/Compiler/Optimizer.py
index c2bc5413..992bfe49 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Optimizer.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Optimizer.py
@@ -15,9 +15,13 @@ def optimize_once(tokens):
         # optimize arithmetic operations. E.g replace 1+2 with 3
 
         # need to be careful not to optimize (1+2*3) to (3*3)
-        if tokens[start_index+1].data in ["*", "/", "%"] or (start_index+3 >= len(tokens)) or (tokens[start_index+3].data not in ["*", "/", "%"]):
-            num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index+2])
-            op = tokens[start_index+1].data
+        if (
+            tokens[start_index + 1].data in ["*", "/", "%"]
+            or (start_index + 3 >= len(tokens))
+            or (tokens[start_index + 3].data not in ["*", "/", "%"])
+        ):
+            num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index + 2])
+            op = tokens[start_index + 1].data
             if op == "+":
                 val = num1 + num2
             elif op == "-":
@@ -38,8 +42,13 @@ def optimize_once(tokens):
                 raise NotImplementedError(op)
 
             # remove the 3 old tokens and replace them with new one
-            new_token = Token(Token.NUM, tokens[start_index].line, tokens[start_index].column, data=str(val),
-                              original_tokens=tokens[start_index:start_index+3])
+            new_token = Token(
+                Token.NUM,
+                tokens[start_index].line,
+                tokens[start_index].column,
+                data=str(val),
+                original_tokens=tokens[start_index : start_index + 3],
+            )
 
             for _ in range(3):
                 tokens.pop(start_index)
@@ -52,16 +61,24 @@ def optimize_once(tokens):
         # replace printint(50) with print("50")
         # since printing strings compiles into less Brainfuck code than printing ints
         if tokens[start_index].data == "printint":
-            tokens[start_index] = Token(Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]])
-            tokens[start_index+2] = Token(Token.STRING, tokens[start_index].line, tokens[start_index].column,
-                                          data=str(tokens[start_index+2].data), original_tokens=[tokens[start_index+2]])
+            tokens[start_index] = Token(
+                Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]]
+            )
+            tokens[start_index + 2] = Token(
+                Token.STRING,
+                tokens[start_index].line,
+                tokens[start_index].column,
+                data=str(tokens[start_index + 2].data),
+                original_tokens=[tokens[start_index + 2]],
+            )
             return True
 
         return False
 
-    rules = [([Token.NUM, Token.BINOP, Token.NUM], optimize_binop),  # arithmetic operations
-             ([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint),  # printint(50) to print("50")
-             ]
+    rules = [
+        ([Token.NUM, Token.BINOP, Token.NUM], optimize_binop),  # arithmetic operations
+        ([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint),  # printint(50) to print("50")
+    ]
 
     # try to match one of the rules to the tokens in a "sliding window" style
     i = 0
@@ -69,7 +86,7 @@ def optimize_once(tokens):
         optimized = False
         for tokens_sequence, optimization_function in rules:
             if i + len(tokens_sequence) <= len(tokens):
-                if all(tokens_sequence[n] == tokens[i+n].type for n in range(len(tokens_sequence))):
+                if all(tokens_sequence[n] == tokens[i + n].type for n in range(len(tokens_sequence))):
                     if optimization_function(tokens, i):
                         optimized = True
         if optimized:
@@ -82,7 +99,7 @@ def optimize(tokens):
     prev_tokens = [token.type for token in tokens]
     while True:
         optimize_once(tokens)
-        print(".", end='')
+        print(".", end="")
         current_tokens = [token.type for token in tokens]
         if current_tokens == prev_tokens:
             break
diff --git a/reasoning_gym/code/contrib/bfit/Compiler/Parser.py b/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
index a658e04a..900ae41d 100644
--- a/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
@@ -1,12 +1,13 @@
-from .Exceptions import BFSyntaxError, BFSemanticError
-from .Token import Token
+from .Exceptions import BFSemanticError, BFSyntaxError
 from .General import is_token_literal
+from .Token import Token
 
 
 class Parser:
     """
     Used to easily iterate tokens
     """
+
     def __init__(self, tokens):
         self.tokens = tokens
         self.current_token_index = 0
@@ -80,7 +81,10 @@ class Parser:
             raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
         for i in range(0, len(tokens_list)):
             if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
-                raise BFSyntaxError("Expected %s after %s" % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index: starting_index+1+i]]))
+                raise BFSyntaxError(
+                    "Expected %s after %s"
+                    % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
+                )
 
     def check_next_token_is(self, token, starting_index=None):
         self.check_next_tokens_are([token], starting_index=starting_index)
diff --git a/reasoning_gym/code/contrib/bfit/Interpreter.py b/reasoning_gym/code/contrib/bfit/Interpreter.py
index 02e0520f..9281772b 100644
--- a/reasoning_gym/code/contrib/bfit/Interpreter.py
+++ b/reasoning_gym/code/contrib/bfit/Interpreter.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
-import sys
 import argparse
+import sys
 
 
 def create_jumps_dictionary(program):
@@ -9,9 +9,9 @@ def create_jumps_dictionary(program):
     res = dict()
 
     for index, command in enumerate(program):
-        if command == '[':
+        if command == "[":
             lbraces.append(index)
-        elif command == ']':
+        elif command == "]":
             if len(lbraces) == 0:
                 raise SyntaxError("Brainfuck: mismatched parentheses (at index: %s)" % index)
 
@@ -35,26 +35,26 @@ def brainfuck(program, bits=8):
     while instruction_pointer < len(program):
         command = program[instruction_pointer]
 
-        if command == '>':
+        if command == ">":
             data_pointer += 1
-        elif command == '<':
+        elif command == "<":
             data_pointer -= 1
-        elif command == '+':
-            data[data_pointer] = (data.get(data_pointer, 0) + 1)
-            if data[data_pointer] == 2 ** bits:
+        elif command == "+":
+            data[data_pointer] = data.get(data_pointer, 0) + 1
+            if data[data_pointer] == 2**bits:
                 data[data_pointer] = 0
-        elif command == '-':
-            data[data_pointer] = (data.get(data_pointer, 0) - 1)
+        elif command == "-":
+            data[data_pointer] = data.get(data_pointer, 0) - 1
             if data[data_pointer] == -1:
-                data[data_pointer] = 2 ** bits - 1
-        elif command == ',':
+                data[data_pointer] = 2**bits - 1
+        elif command == ",":
             data[data_pointer] = ord(sys.stdin.read(1)) % 256
-        elif command == '.':
-            print(chr(data.get(data_pointer, 0)), end='', flush=True)
-        elif command == '[':
+        elif command == ".":
+            print(chr(data.get(data_pointer, 0)), end="", flush=True)
+        elif command == "[":
             if data.get(data_pointer, 0) == 0:
                 instruction_pointer = jumps[instruction_pointer]
-        elif command == ']':
+        elif command == "]":
             if data.get(data_pointer, 0) != 0:
                 instruction_pointer = jumps[instruction_pointer]
         else:  # everything else is comment
@@ -63,16 +63,19 @@ def brainfuck(program, bits=8):
         instruction_pointer += 1
 
     if data_pointer != 0:
-        print("WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)" % str(data_pointer))
+        print(
+            "WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)"
+            % str(data_pointer)
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("filepath")
     parser.add_argument("--bits", "-b", "--interpreter-bits", type=int, default=8, help="Amount of bits each cell uses")
 
     args = parser.parse_args()
-    with open(args.filepath, 'r') as f:
+    with open(args.filepath, "r") as f:
         code = f.read()
 
     brainfuck(code, args.bits)
diff --git a/reasoning_gym/code/contrib/bfit/README.md b/reasoning_gym/code/contrib/bfit/README.md
index 4ad60e98..2b503c05 100644
--- a/reasoning_gym/code/contrib/bfit/README.md
+++ b/reasoning_gym/code/contrib/bfit/README.md
@@ -57,7 +57,7 @@ int main()
 $ ./BF-it.py helloworld.code
 Compiling file 'helloworld.code'...
 Compiled successfully to 'helloworld.bf'
-$ cat helloworld.bf 
+$ cat helloworld.bf
 >[-]>[-]<>++++++++[-<+++++++++>]<.>++++[-<+++++++>]
 <+.+++++++..+++.>++++++[-<------------->]<-.>+++++[
 -<+++++++++++>]<.>++++[-<++++++>]<.+++.------.-----
@@ -98,4 +98,3 @@ If you found a bug, or have an idea for a feature, open an issue
 * https://introcs.cs.princeton.edu/java/11precedence/ for operator precedence
 * https://logomakr.com/ for creating a logo
 * https://www.youtube.com/ for setting the mood
-
diff --git a/reasoning_gym/cognition/__init__.py b/reasoning_gym/cognition/__init__.py
index e1f01947..fddd97b1 100644
--- a/reasoning_gym/cognition/__init__.py
+++ b/reasoning_gym/cognition/__init__.py
@@ -7,9 +7,9 @@ Cognition tasks for training reasoning capabilities:
 """
 
 from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset
+from .figlet_fonts import FigletFontConfig, FigletFontDataset
 from .number_sequences import NumberSequenceConfig, NumberSequenceDataset
 from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset
-from .figlet_fonts import FigletFontConfig, FigletFontDataset
 
 __all__ = [
     "NumberSequenceConfig",
@@ -19,5 +19,5 @@ __all__ = [
     "RubiksCubeConfig",
     "RubiksCubeDataset",
     "FigletFontConfig",
-    "FigletFontDataset"
+    "FigletFontDataset",
 ]
diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py
index 0826dea6..a801c6e4 100644
--- a/reasoning_gym/games/__init__.py
+++ b/reasoning_gym/games/__init__.py
@@ -7,10 +7,10 @@ Game tasks for training reasoning capabilities:
 """
 
 from .countdown import CountdownConfig, CountdownDataset
+from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 from .maze import MazeConfig, MazeDataset
 from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
 from .sudoku import SudokuConfig, SudokuDataset
-from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 
 __all__ = [
     "CountdownConfig",
diff --git a/reasoning_gym/games/game_of_life.py b/reasoning_gym/games/game_of_life.py
index cc4dc6a8..c8cdc0d1 100644
--- a/reasoning_gym/games/game_of_life.py
+++ b/reasoning_gym/games/game_of_life.py
@@ -1,18 +1,19 @@
 from dataclasses import dataclass
 from random import Random
-from typing import List, Optional, Tuple, Dict
+from typing import Dict, List, Optional, Tuple
 
 import cellpylib as cpl
 
 from ..factory import ProceduralDataset, register_dataset
 
+
 @dataclass
 class GameOfLifeConfig:
     """Configuration for sudoku puzzle generation"""
 
-    grid_size_x: int = 20 
+    grid_size_x: int = 20
     grid_size_y: int = 20
-    filled_cells: int = 100 # actually a max
+    filled_cells: int = 100  # actually a max
     simulation_steps: int = 1
     seed: Optional[int] = None
     size: int = 500
@@ -25,11 +26,12 @@ class GameOfLifeConfig:
         assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
 
 
-class GameOfLifeConfigDataset(ProceduralDataset):
+class GameOfLifeDataset(ProceduralDataset):
     """Generates Game of Life games with configurable parameters"""
 
     def __init__(self, config: GameOfLifeConfig):
-        self._prompt_templates = ["What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}"
+        self._prompt_templates = [
+            "What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}"
         ]
 
         super().__init__(config=config, seed=config.seed, size=config.size)
@@ -46,7 +48,7 @@ class GameOfLifeConfigDataset(ProceduralDataset):
         rng = Random(self.seed + idx)
 
         # Make the board
-        board  = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
+        board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
         board[:, :, :] = 0
 
         # Add the cells
@@ -56,13 +58,17 @@ class GameOfLifeConfigDataset(ProceduralDataset):
             board[:, rx, ry] = 1
 
         # Simulate the result to get the answer
-        evolved = cpl.evolve2d(board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize='recursive')
+        evolved = cpl.evolve2d(
+            board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize="recursive"
+        )
 
         board_str = str(board[0])
         result_str = str(evolved[-1])
 
         return {
-            "question": rng.choice(self._prompt_templates).format(simulation_steps=self.config.simulation_steps, board=board_str),
+            "question": rng.choice(self._prompt_templates).format(
+                simulation_steps=self.config.simulation_steps, board=board_str
+            ),
             "answer": result_str,
             "metadata": {
                 "grid_size_x": self.config.grid_size_x,
@@ -87,10 +93,10 @@ class GameOfLifeConfigDataset(ProceduralDataset):
 
         if answer == None:
             return 0.0
-        if answer.replace('\n', '') != entry['answer'].replace('\n', ''):
+        if answer.replace("\n", "") != entry["answer"].replace("\n", ""):
             return 0.01
         else:
-            return 1.0 # Yay
+            return 1.0  # Yay
 
 
-register_dataset("game_of_life", GameOfLifeConfigDataset, GameOfLifeConfig)
+register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
diff --git a/reasoning_gym/graphs/__init__.py b/reasoning_gym/graphs/__init__.py
index 409e954f..6bbe7d67 100644
--- a/reasoning_gym/graphs/__init__.py
+++ b/reasoning_gym/graphs/__init__.py
@@ -3,7 +3,7 @@ from .quantum_lock import QuantumLockConfig, QuantumLockDataset
 
 __all__ = [
     "FamilyRelationshipsConfig",
-    "FamilyRelationshipsDataset", 
+    "FamilyRelationshipsDataset",
     "QuantumLockConfig",
     "QuantumLockDataset",
 ]
diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py
index b9cf4630..06d841d4 100755
--- a/scripts/generate_gallery.py
+++ b/scripts/generate_gallery.py
@@ -32,7 +32,7 @@ def generate_gallery() -> str:
 
         # Add dataset header with anchor
         anchor = name.replace("_", "-").lower()
-        content.append(f"### {name} {{{anchor}}}\n")
+        content.append(f"### {name}\n")
 
         # Get dataset class docstring if available
         if dataset.__class__.__doc__:
diff --git a/tests/test_bf.py b/tests/test_bf.py
index cefac4c7..86d2619a 100644
--- a/tests/test_bf.py
+++ b/tests/test_bf.py
@@ -2,6 +2,7 @@ import pytest
 
 from reasoning_gym.code.bf import BFConfig, BFDataset
 
+
 def test_bf():
     """Test basic properties and solution of generated items"""
 
@@ -34,4 +35,4 @@ def test_bf():
     config = BFConfig(seed=44, size=20, difficulty=3)
     dataset = BFDataset(config)
     for item in dataset:
-        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
\ No newline at end of file
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
diff --git a/tests/test_game_of_life.py b/tests/test_game_of_life.py
index 288a1fe4..df0f133d 100644
--- a/tests/test_game_of_life.py
+++ b/tests/test_game_of_life.py
@@ -1,20 +1,14 @@
 import pytest
 
-from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeConfigDataset
+from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeDataset
+
 
 def test_game_of_life():
     """Test basic properties and solution of generated items"""
 
     # Easy
-    config = GameOfLifeConfig(
-        seed=42, 
-        size=1, 
-        grid_size_x=20,
-        grid_size_y=20,
-        filled_cells=10,
-        simulation_steps=1
-    )
-    dataset = GameOfLifeConfigDataset(config)
+    config = GameOfLifeConfig(seed=42, size=1, grid_size_x=20, grid_size_y=20, filled_cells=10, simulation_steps=1)
+    dataset = GameOfLifeDataset(config)
 
     for item in dataset:
         assert isinstance(item, dict)