diff --git a/GALLERY.md b/GALLERY.md
index 07642aa7..4ead3457 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -5,6 +5,7 @@ This gallery shows examples from all available datasets using their default conf
 - [advanced_geometry](#advanced_geometry)
 - [aiw](#aiw)
 - [arc_1d](#arc_1d)
+- [arc_agi](#arc_agi)
 - [base_conversion](#base_conversion)
 - [basic_arithmetic](#basic_arithmetic)
 - [bf](#bf)
@@ -230,6 +231,421 @@ Metadata: {'task_name': 'two_points_and_fill_inv', 'size': 26, 'train_examples':
 
 ````
 
+### arc_agi
+Default configuration:
+```python
+use_train = True
+use_eval = True
+board_format_opts = BoardFormattingOptions(alphabet=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], col_delimiter=' ', row_delimiter='\n', array_brackets=False)
+rotations = ['90', '180', '270']
+mirrors = ['horizontal', 'vertical', 'diagonal', 'counterdiagonal']
+use_color_permutation = True
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Find the common rule that maps an input grid to an output grid, given the examples below.
+
+Example 1:
+
+Input:
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 6 3 6 7 7 7 7 7 7 7 7 7 7
+7 6 6 3 7 6 6 6 7 7 6 3 7 7
+7 7 7 7 7 6 3 6 7 7 6 6 7 7
+7 7 7 7 7 6 6 3 7 7 7 7 7 7
+7 7 7 7 7 3 6 6 7 7 7 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 3 6
+7 6 6 3 7 7 7 7 7 7 7 6 6 6
+7 3 6 6 7 7 7 7 7 7 7 7 7 7
+7 6 6 6 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 3 6 6 7 7 7 7
+7 7 7 7 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+Output:
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 6 3 7 7
+7 7 7 7 7 7 7 7 7 7 6 6 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 3 6
+7 7 7 7 7 7 7 7 7 7 7 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 3 6 6 7 7 7 7
+7 7 7 7 7 7 7 6 6 6 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+
+Example 2:
+
+Input:
+7 7 7 7 7 6 3 6 7 7 7 6 6 7
+7 7 7 7 7 6 6 6 7 7 7 6 6 7
+6 6 6 6 7 6 6 6 7 7 7 6 6 7
+6 3 6 6 7 7 7 7 7 7 7 7 7 7
+6 6 6 6 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 6 6 3 6 7
+7 7 7 7 7 7 7 7 7 6 3 6 6 7
+7 7 7 6 6 6 6 7 7 6 6 6 3 7
+7 7 7 6 6 3 6 7 7 7 7 7 7 7
+7 7 7 6 3 6 6 7 7 7 7 7 7 7
+7 7 7 6 6 6 6 7 7 7 6 3 6 6
+7 7 7 7 7 7 7 7 7 7 6 6 6 3
+7 7 7 7 7 7 7 7 7 7 6 3 3 6
+7 7 7 7 7 7 7 7 7 7 6 6 6 6
+Output:
+7 7 7 7 7 6 3 6 7 7 7 6 6 7
+7 7 7 7 7 6 6 6 7 7 7 6 6 7
+6 6 6 6 7 6 6 6 7 7 7 6 6 7
+6 3 6 6 7 7 7 7 7 7 7 7 7 7
+6 6 6 6 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7
+
+Example 3:
+
+Input:
+7 7 7 7 7 6 6 6 6 7 7 3 6 7 7
+6 6 6 6 7 3 6 6 3 7 7 6 3 7 7
+6 3 6 6 7 6 6 6 6 7 7 7 7 7 7
+6 6 6 6 7 6 6 3 6 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 3 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 6 6 6
+7 7 6 6 3 6 6 7 7 7 7 7 7 7 7
+7 7 6 6 6 3 6 7 7 7 7 7 7 7 7
+7 7 6 3 6 6 6 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 6 6 3 7 7 7
+7 7 6 6 6 6 7 7 7 6 3 6 7 7 7
+7 7 6 6 6 6 7 7 7 6 6 6 7 7 7
+7 7 6 6 6 6 7 7 7 3 6 3 7 7 7
+Output:
+7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
+6 6 6 6 7 7 7 7 7 7 7 7 7 7 7
+6 3 6 6 7 7 7 7 7 7 7 7 7 7 7
+6 6 6 6 7 7 7 7 7 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 3 6 6
+7 7 7 7 7 7 7 7 7 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
+7 7 6 6 6 6 7 7 7 7 7 7 7 7 7
+7 7 6 6 6 6 7 7 7 7 7 7 7 7 7
+7 7 6 6 6 6 7 7 7 7 7 7 7 7 7
+
+
+Below is a test input grid. Predict the corresponding output grid by applying the rule you found.
+Your final answer should just be the text output grid itself.
+
+Input:
+7 7 7 7 7 7 7 7 6 3 6 6
+6 6 6 7 7 7 7 7 6 6 6 6
+3 6 6 7 7 7 7 7 6 3 6 3
+6 6 6 7 3 6 6 7 7 7 7 7
+7 7 7 7 6 6 6 7 7 7 7 7
+7 7 7 7 6 6 3 7 7 7 7 7
+7 7 7 7 6 6 6 7 6 6 6 6
+7 7 7 7 7 7 7 7 6 6 3 6
+7 6 6 6 6 6 6 7 6 6 6 6
+7 6 6 6 6 3 6 7 6 6 6 6
+7 6 3 6 6 6 6 7 7 7 7 7
+7 6 6 6 6 6 6 7 6 6 6 7
+7 7 7 7 7 7 7 7 6 6 6 7
+
+Answer: 7 7 7 7 7 7 7 7 7 7 7 7
+6 6 6 7 7 7 7 7 7 7 7 7
+3 6 6 7 7 7 7 7 7 7 7 7
+6 6 6 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 6 6 3 6
+7 7 7 7 7 7 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 6 6 6 6
+7 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 6 6 6 7
+7 7 7 7 7 7 7 7 6 6 6 7
+Metadata: {'input': ((7, 7, 7, 7, 7, 7, 7, 7, 6, 3, 6, 6), (6, 6, 6, 7, 7, 7, 7, 7, 6, 6, 6, 6), (3, 6, 6, 7, 7, 7, 7, 7, 6, 3, 6, 3), (6, 6, 6, 7, 3, 6, 6, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 6, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 3, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 6, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 3, 6), (7, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6), (7, 6, 6, 6, 6, 3, 6, 7, 6, 6, 6, 6), (7, 6, 3, 6, 6, 6, 6, 7, 7, 7, 7, 7), (7, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7)), 'output': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (3, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 3, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7)), 'task_id': 'a934301b'}
+
+Example 2:
+Question: Find the common rule that maps an input grid to an output grid, given the examples below.
+
+Example 1:
+
+Input:
+2 8 8 8 8 8 8 8 8 9
+2 8 8 0 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 0 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 0 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+Output:
+2 8 8 8 8 8 8 8 8 9
+2 8 8 2 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 9 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 9 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+2 8 8 8 8 8 8 8 8 9
+
+Example 2:
+
+Input:
+6 6 6 6 6 6 6 6 6 6
+8 8 8 8 8 8 8 8 8 8
+8 8 0 8 8 8 8 8 0 8
+8 8 8 8 8 8 0 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 0 8 8 8 8
+8 0 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+1 1 1 1 1 1 1 1 1 1
+Output:
+6 6 6 6 6 6 6 6 6 6
+8 8 8 8 8 8 8 8 8 8
+8 8 6 8 8 8 8 8 6 8
+8 8 8 8 8 8 6 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 1 8 8 8 8
+8 1 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+1 1 1 1 1 1 1 1 1 1
+
+Example 3:
+
+Input:
+5 5 5 5 5 5 5 5 5 5
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 0 8 8 8 8
+8 8 0 8 8 8 8 8 0 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 0 8 8 8 8 0 8
+8 8 8 8 8 8 0 8 8 8
+8 8 8 8 8 8 8 8 8 8
+7 7 7 7 7 7 7 7 7 7
+Output:
+5 5 5 5 5 5 5 5 5 5
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 5 8 8 8 8
+8 8 5 8 8 8 8 8 5 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 8 8 8 8 8 8 8
+8 8 8 7 8 8 8 8 7 8
+8 8 8 8 8 8 7 8 8 8
+8 8 8 8 8 8 8 8 8 8
+7 7 7 7 7 7 7 7 7 7
+
+
+Below is a test input grid. Predict the corresponding output grid by applying the rule you found.
+Your final answer should just be the text output grid itself.
+
+Input:
+6 8 8 8 8 8 8 8 0 4
+6 0 8 8 0 8 8 8 8 4
+6 8 8 8 8 8 8 8 8 4
+6 8 8 8 8 8 0 8 8 4
+6 8 8 0 8 8 8 8 8 4
+6 8 8 8 8 8 0 8 8 4
+6 8 8 8 8 8 8 8 8 4
+6 8 8 8 8 0 8 8 8 4
+6 8 8 0 8 8 8 0 8 4
+6 8 8 8 8 8 8 8 8 4
+
+Answer: 6 8 8 8 8 8 8 8 4 4
+6 6 8 8 6 8 8 8 8 4
+6 8 8 8 8 8 8 8 8 4
+6 8 8 8 8 8 4 8 8 4
+6 8 8 6 8 8 8 8 8 4
+6 8 8 8 8 8 4 8 8 4
+6 8 8 8 8 8 8 8 8 4
+6 8 8 8 8 4 8 8 8 4
+6 8 8 6 8 8 8 4 8 4
+6 8 8 8 8 8 8 8 8 4
+Metadata: {'input': ((6, 8, 8, 8, 8, 8, 8, 8, 0, 4), (6, 0, 8, 8, 0, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 0, 8, 8, 4), (6, 8, 8, 0, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 0, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 0, 8, 8, 8, 4), (6, 8, 8, 0, 8, 8, 8, 0, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4)), 'output': ((6, 8, 8, 8, 8, 8, 8, 8, 4, 4), (6, 6, 8, 8, 6, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 4, 8, 8, 4), (6, 8, 8, 6, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 4, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 4, 8, 8, 8, 4), (6, 8, 8, 6, 8, 8, 8, 4, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4)), 'task_id': '2204b7a8'}
+
+Example 3:
+Question: Find the common rule that maps an input grid to an output grid, given the examples below.
+
+Example 1:
+
+Input:
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5
+5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 5 5 5 5 5
+5 5 8 8 8 8 5 5 5 5 5 8 8 8 8 5 5 5 5 5
+2 5 8 8 8 8 5 5 5 5 5 8 8 8 8 5 5 5 5 2
+5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 8 8 8 8 8 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5
+Output:
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 8 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 8 5
+5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 2 2 2 2 5 5 5 5 5
+5 5 2 2 2 2 5 2 5 5 5 2 2 2 2 5 5 5 5 5
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
+5 5 2 2 2 2 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 2 2 2 2 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5
+
+Example 2:
+
+Input:
+5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 5 5 5 5 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 8 8 8 8 8 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 8 8 8 8 8 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 8 8 8 8 8 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+Output:
+5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 5 5 2 5 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 5 5 5 2 5 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 2 2 2 2 2 5 8 8 8 5 5 5 5 8 8 8 8
+5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 2 2 2 2 2 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 2 2 2 2 2 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+
+Example 3:
+
+Input:
+5 8 8 8 8 8 5 2 5 5 5 5 5 5
+5 8 8 8 8 8 5 5 5 5 5 8 8 8
+5 5 5 5 5 5 5 5 5 5 5 8 8 8
+5 5 5 5 8 8 8 8 8 8 5 8 8 8
+5 5 5 5 8 8 8 8 8 8 5 8 8 8
+5 5 5 5 8 8 8 8 8 8 5 8 8 8
+8 8 5 5 8 8 8 8 8 8 5 5 5 5
+8 8 5 5 8 8 8 8 8 8 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 5 5 8 8 8 5 5 5 5
+2 5 8 8 8 5 5 8 8 8 5 5 5 2
+5 5 8 8 8 5 5 5 5 5 5 5 5 5
+5 5 8 8 8 5 5 2 5 5 5 5 5 5
+Output:
+5 8 8 8 8 8 5 2 5 5 5 5 5 5
+5 8 8 8 8 8 5 2 5 5 5 8 8 8
+5 5 5 5 5 5 5 2 5 5 5 8 8 8
+5 5 5 5 2 2 2 2 2 2 5 8 8 8
+5 5 5 5 2 2 2 2 2 2 5 8 8 8
+5 5 5 5 2 2 2 2 2 2 5 8 8 8
+8 8 5 5 2 2 2 2 2 2 5 5 5 5
+8 8 5 5 2 2 2 2 2 2 5 5 5 5
+5 5 5 5 5 5 5 2 5 5 5 5 5 5
+5 5 2 2 2 5 5 2 2 2 5 5 5 5
+2 2 2 2 2 2 2 2 2 2 2 2 2 2
+5 5 2 2 2 5 5 2 5 5 5 5 5 5
+5 5 2 2 2 5 5 2 5 5 5 5 5 5
+
+
+Below is a test input grid. Predict the corresponding output grid by applying the rule you found.
+Your final answer should just be the text output grid itself.
+
+Input:
+5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5
+5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 8 8 5 5 8 8 8 5
+5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 8 8 8 5
+5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 8 8 8 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+2 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 2
+5 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 8 8 5
+5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 8 8 5
+5 5 5 5 8 8 8 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5
+2 5 5 5 8 8 8 5 5 5 5 5 5 8 8 8 8 8 5 8 8 8 8 5 2
+5 5 5 5 8 8 8 5 5 8 8 8 5 8 8 8 8 8 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 5 5 5 8 8 8 8 5 5
+5 5 5 5 5 2 5 5 5 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5
+
+Answer: 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 8 8 5 5 5 5 5 5
+5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 8 8 5 5 8 8 8 5
+5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 8 8 8 5
+5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 8 8 8 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
+5 2 2 2 2 2 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 8 8 5
+5 5 5 5 5 2 5 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 8 8 5
+5 5 5 5 2 2 2 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
+5 5 5 5 2 2 2 5 5 2 2 2 5 2 2 2 2 2 5 2 2 2 2 5 5
+5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 2 2 2 2 5 5
+5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 2 2 2 2 5 5
+5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5
+Metadata: {'input': ((5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (2, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2), (5, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5), (2, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 5, 2), (5, 5, 5, 5, 8, 8, 8, 5, 5, 8, 8, 8, 5, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5)), 'output': ((5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 8, 8, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), (5, 2, 2, 2, 2, 2, 5, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5), (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), (5, 5, 5, 5, 2, 2, 2, 5, 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5)), 'task_id': '0d87d2a6'}
+
+````
+
 ### base_conversion
 Generates base conversion tasks
 
@@ -2097,7 +2513,11 @@ Input:
 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3
 
-Answer: ((3, 9, 3, 9, 3, 9, 3, 9), (3, 9, 3, 9, 3, 9, 3, 3), (3, 9, 3, 9, 3, 9, 9, 9), (3, 9, 3, 9, 3, 3, 3, 3), (3, 9, 3, 9, 9, 9, 9, 9))
+Answer: 3 9 3 9 3 9 3 9
+3 9 3 9 3 9 3 3
+3 9 3 9 3 9 9 9
+3 9 3 9 3 3 3 3
+3 9 3 9 9 9 9 9
 Metadata: {'input': ((3, 3, 3, 3, 3, 3, 3, 9), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3)), 'output': ((3, 9, 3, 9, 3, 9, 3, 9), (3, 9, 3, 9, 3, 9, 3, 3), (3, 9, 3, 9, 3, 9, 9, 9), (3, 9, 3, 9, 3, 3, 3, 3), (3, 9, 3, 9, 9, 9, 9, 9)), 'task_id': 'd22278a0', 'difficulty': {'rng': 0.07173948707162241, 'pso': 0.12314814814814816}}
 
 Example 2:
@@ -2232,7 +2652,14 @@ Input:
 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7
 
-Answer: ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7))
+Answer: 7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 8 7 7 7
+7 7 7 7 7 7 8 7 8 7 7
+7 7 7 7 7 8 7 8 7 8 7
+7 7 7 7 7 7 8 7 8 7 7
+7 7 7 7 7 7 7 8 7 7 7
+7 7 7 7 7 7 7 7 7 7 7
+7 7 7 7 7 7 7 7 7 7 7
 Metadata: {'input': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), 'output': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), 'task_id': '11852cab', 'difficulty': {'rng': 0.09651305327452808, 'pso': 0.15228956228956228}}
 
 Example 3:
@@ -2276,7 +2703,10 @@ Input:
 1 1 1 1 1
 1 1 1 1 1
 
-Answer: ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1))
+Answer: 1 1 1 1 1
+1 1 1 1 1
+1 1 1 1 1
+1 1 1 1 1
 Metadata: {'input': ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1)), 'output': ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1)), 'task_id': '8be77c9e', 'difficulty': {'rng': 0.09322002370336528, 'pso': 0.0638888888888889}}
 
 ````
@@ -2993,7 +3423,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
 Example 2:
 Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
 Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 8, 9, 44), 'end_time': datetime.datetime(2025, 2, 8, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 9, 9, 44), 'end_time': datetime.datetime(2025, 2, 9, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
 
 Example 3:
 Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.
diff --git a/reasoning_gym/arc/arc_agi.py b/reasoning_gym/arc/arc_agi.py
index 92d7ec0d..b96698bb 100644
--- a/reasoning_gym/arc/arc_agi.py
+++ b/reasoning_gym/arc/arc_agi.py
@@ -22,15 +22,23 @@ class ArcAgiConfig:
     board_format_opts: BoardFormattingOptions = field(default_factory=lambda: BoardFormattingOptions())
 
     # Augmentation options
-    use_rotations: bool = True
-    use_mirrors: bool = True
+    rotations: list[str] = field(default_factory=lambda: ["90", "180", "270"])  # empty list for no rotations
+    mirrors: list[str] = field(
+        default_factory=lambda: ["horizontal", "vertical", "diagonal", "counterdiagonal"]
+    )  # empty list for no mirrors
     use_color_permutation: bool = True
-    
+
     seed: Optional[int] = None
     size: int = 500
 
     def validate(self):
         assert self.size > 0, "Size of dataset must be positive."
+        valid_rotations = ["90", "180", "270"]
+        valid_mirrors = ["horizontal", "vertical", "diagonal", "counterdiagonal"]
+        for rot in self.rotations:
+            assert rot in valid_rotations, f"Invalid rotation option: {rot}"
+        for mirror in self.mirrors:
+            assert mirror in valid_mirrors, f"Invalid mirror option: {mirror}"
 
 
 Board = list[list[int]]
@@ -103,11 +111,17 @@ class ArcAgiDataset(ProceduralDataset):
         """Create a composite augmentation function from enabled options"""
         fns = []
 
-        if self.config.use_rotations:
-            fns.append(rng.choice(ROTATION_AUGMENTATIONS))
+        # Map rotation strings to functions
+        rotation_map = {"90": rot90, "180": rot180, "270": rot270}
+        if self.config.rotations:
+            chosen_rot = rng.choice([identity] + [rotation_map[r] for r in self.config.rotations])
+            fns.append(chosen_rot)
 
-        if self.config.use_mirrors:
-            fns.append(rng.choice(MIRROR_AUGMENTATIONS))
+        # Map mirror strings to functions
+        mirror_map = {"horizontal": hmirror, "vertical": vmirror, "diagonal": dmirror, "counterdiagonal": cmirror}
+        if self.config.mirrors:
+            chosen_mirror = rng.choice([identity] + [mirror_map[m] for m in self.config.mirrors])
+            fns.append(chosen_mirror)
 
         if self.config.use_color_permutation:
             color_table = list(range(10))
diff --git a/tests/test_arc_agi.py b/tests/test_arc_agi.py
index bb42e9d8..da43e6ab 100644
--- a/tests/test_arc_agi.py
+++ b/tests/test_arc_agi.py
@@ -8,10 +8,23 @@ def test_arc_agi_config_validation():
     with pytest.raises(AssertionError):
         ArcAgiConfig(size=0).validate()
 
-    # Valid config should not raise
+    with pytest.raises(AssertionError):
+        ArcAgiConfig(rotations=["invalid"]).validate()
+
+    with pytest.raises(AssertionError):
+        ArcAgiConfig(mirrors=["invalid"]).validate()
+
+    # Valid configs should not raise
     config = ArcAgiConfig(size=10, seed=42)
     config.validate()
 
+    config = ArcAgiConfig(rotations=["90", "180"], mirrors=["horizontal", "diagonal"])
+    config.validate()
+
+    # Empty lists should be valid (no augmentations)
+    config = ArcAgiConfig(rotations=[], mirrors=[])
+    config.validate()
+
 
 def test_arc_agi_deterministic():
     """Test dataset reproducibility with fixed seed"""
@@ -52,26 +65,36 @@ def test_arc_agi_items():
 def test_arc_agi_augmentations():
     """Test that augmentations can be selectively enabled/disabled"""
     # Test with all augmentations disabled
-    config = ArcAgiConfig(seed=42, size=10, use_rotations=False, use_mirrors=False, use_color_permutation=False)
+    config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=[], use_color_permutation=False)
     base_dataset = ArcAgiDataset(config)
     base_items = list(base_dataset)
 
-    # Test with rotations only
-    rot_config = ArcAgiConfig(seed=42, size=10, use_rotations=True, use_mirrors=False, use_color_permutation=False)
+    # Test with specific rotation only
+    rot_config = ArcAgiConfig(seed=42, size=10, rotations=["90"], mirrors=[], use_color_permutation=False)
     rot_dataset = ArcAgiDataset(rot_config)
     rot_items = list(rot_dataset)
 
-    # Items should differ when rotations are enabled
+    # Items should differ with rotation enabled
     assert any(
         base_items[i]["metadata"]["input"] != rot_items[i]["metadata"]["input"] for i in range(len(base_items))
-    ), "Rotation augmentation had no effect"
+    ), "90-degree rotation augmentation had no effect"
+
+    # Test with specific mirror only
+    mirror_config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=["horizontal"], use_color_permutation=False)
+    mirror_dataset = ArcAgiDataset(mirror_config)
+    mirror_items = list(mirror_dataset)
+
+    # Items should differ with mirror enabled
+    assert any(
+        base_items[i]["metadata"]["input"] != mirror_items[i]["metadata"]["input"] for i in range(len(base_items))
+    ), "Horizontal mirror augmentation had no effect"
 
     # Test with color permutation only
-    color_config = ArcAgiConfig(seed=42, size=10, use_rotations=False, use_mirrors=False, use_color_permutation=True)
+    color_config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=[], use_color_permutation=True)
     color_dataset = ArcAgiDataset(color_config)
     color_items = list(color_dataset)
 
-    # Items should differ when color permutation is enabled
+    # Items should differ with color permutation enabled
     assert any(
         base_items[i]["metadata"]["input"] != color_items[i]["metadata"]["input"] for i in range(len(base_items))
     ), "Color permutation had no effect"