diff --git a/GALLERY.md b/GALLERY.md index 07642aa7..4ead3457 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -5,6 +5,7 @@ This gallery shows examples from all available datasets using their default conf - [advanced_geometry](#advanced_geometry) - [aiw](#aiw) - [arc_1d](#arc_1d) +- [arc_agi](#arc_agi) - [base_conversion](#base_conversion) - [basic_arithmetic](#basic_arithmetic) - [bf](#bf) @@ -230,6 +231,421 @@ Metadata: {'task_name': 'two_points_and_fill_inv', 'size': 26, 'train_examples': ```` +### arc_agi +Default configuration: +```python +use_train = True +use_eval = True +board_format_opts = BoardFormattingOptions(alphabet=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], col_delimiter=' ', row_delimiter='\n', array_brackets=False) +rotations = ['90', '180', '270'] +mirrors = ['horizontal', 'vertical', 'diagonal', 'counterdiagonal'] +use_color_permutation = True +seed = 42 +size = 500 +``` + +Example tasks: +```` +Example 1: +Question: Find the common rule that maps an input grid to an output grid, given the examples below. + +Example 1: + +Input: +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 6 3 6 7 7 7 7 7 7 7 7 7 7 +7 6 6 3 7 6 6 6 7 7 6 3 7 7 +7 7 7 7 7 6 3 6 7 7 6 6 7 7 +7 7 7 7 7 6 6 3 7 7 7 7 7 7 +7 7 7 7 7 3 6 6 7 7 7 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 3 6 +7 6 6 3 7 7 7 7 7 7 7 6 6 6 +7 3 6 6 7 7 7 7 7 7 7 7 7 7 +7 6 6 6 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 3 6 6 7 7 7 7 +7 7 7 7 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +Output: +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 6 3 7 7 +7 7 7 7 7 7 7 7 7 7 6 6 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 3 6 +7 7 7 7 7 7 7 7 7 7 7 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 3 6 6 7 7 7 7 +7 7 7 7 7 7 7 6 6 6 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 + +Example 2: + +Input: +7 7 7 7 7 6 3 6 7 7 7 6 6 7 +7 7 7 7 7 6 6 6 7 7 7 6 6 7 +6 6 6 6 7 6 6 6 7 7 7 6 6 7 +6 3 6 6 7 7 7 7 7 7 7 7 7 7 +6 6 6 6 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 6 6 3 6 7 +7 7 7 7 7 7 7 7 7 6 3 6 6 7 +7 7 7 6 6 6 6 7 7 6 6 6 3 7 +7 7 7 6 6 3 6 7 7 7 7 7 7 7 +7 7 7 6 3 6 6 7 7 7 7 7 7 7 +7 7 7 6 6 6 6 7 7 7 6 3 6 6 +7 7 7 7 7 7 7 7 7 7 6 6 6 3 +7 7 7 7 7 7 7 7 7 7 6 3 3 6 +7 7 7 7 7 7 7 7 7 7 6 6 6 6 +Output: +7 7 7 7 7 6 3 6 7 7 7 6 6 7 +7 7 7 7 7 6 6 6 7 7 7 6 6 7 +6 6 6 6 7 6 6 6 7 7 7 6 6 7 +6 3 6 6 7 7 7 7 7 7 7 7 7 7 +6 6 6 6 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 + +Example 3: + +Input: +7 7 7 7 7 6 6 6 6 7 7 3 6 7 7 +6 6 6 6 7 3 6 6 3 7 7 6 3 7 7 +6 3 6 6 7 6 6 6 6 7 7 7 7 7 7 +6 6 6 6 7 6 6 3 6 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 3 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 6 6 6 +7 7 6 6 3 6 6 7 7 7 7 7 7 7 7 +7 7 6 6 6 3 6 7 7 7 7 7 7 7 7 +7 7 6 3 6 6 6 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 6 6 3 7 7 7 +7 7 6 6 6 6 7 7 7 6 3 6 7 7 7 +7 7 6 6 6 6 7 7 7 6 6 6 7 7 7 +7 7 6 6 6 6 7 7 7 3 6 3 7 7 7 +Output: +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 +6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 +6 3 6 6 7 7 7 7 7 7 7 7 7 7 7 +6 6 6 6 7 7 7 7 7 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 3 6 6 +7 7 7 7 7 7 7 7 7 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 +7 7 6 6 6 6 7 7 7 7 7 7 7 7 7 +7 7 6 6 6 6 7 7 7 7 7 7 7 7 7 +7 7 6 6 6 6 7 7 7 7 7 7 7 7 7 + + +Below is a test input grid. Predict the corresponding output grid by applying the rule you found. +Your final answer should just be the text output grid itself. + +Input: +7 7 7 7 7 7 7 7 6 3 6 6 +6 6 6 7 7 7 7 7 6 6 6 6 +3 6 6 7 7 7 7 7 6 3 6 3 +6 6 6 7 3 6 6 7 7 7 7 7 +7 7 7 7 6 6 6 7 7 7 7 7 +7 7 7 7 6 6 3 7 7 7 7 7 +7 7 7 7 6 6 6 7 6 6 6 6 +7 7 7 7 7 7 7 7 6 6 3 6 +7 6 6 6 6 6 6 7 6 6 6 6 +7 6 6 6 6 3 6 7 6 6 6 6 +7 6 3 6 6 6 6 7 7 7 7 7 +7 6 6 6 6 6 6 7 6 6 6 7 +7 7 7 7 7 7 7 7 6 6 6 7 + +Answer: 7 7 7 7 7 7 7 7 7 7 7 7 +6 6 6 7 7 7 7 7 7 7 7 7 +3 6 6 7 7 7 7 7 7 7 7 7 +6 6 6 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 6 6 3 6 +7 7 7 7 7 7 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 6 6 6 6 +7 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 6 6 6 7 +7 7 7 7 7 7 7 7 6 6 6 7 +Metadata: {'input': ((7, 7, 7, 7, 7, 7, 7, 7, 6, 3, 6, 6), (6, 6, 6, 7, 7, 7, 7, 7, 6, 6, 6, 6), (3, 6, 6, 7, 7, 7, 7, 7, 6, 3, 6, 3), (6, 6, 6, 7, 3, 6, 6, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 6, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 3, 7, 7, 7, 7, 7), (7, 7, 7, 7, 6, 6, 6, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 3, 6), (7, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6), (7, 6, 6, 6, 6, 3, 6, 7, 6, 6, 6, 6), (7, 6, 3, 6, 6, 6, 6, 7, 7, 7, 7, 7), (7, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7)), 'output': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (3, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 3, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7), (7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7)), 'task_id': 'a934301b'} + +Example 2: +Question: Find the common rule that maps an input grid to an output grid, given the examples below. + +Example 1: + +Input: +2 8 8 8 8 8 8 8 8 9 +2 8 8 0 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 0 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 0 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +Output: +2 8 8 8 8 8 8 8 8 9 +2 8 8 2 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 9 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 9 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 +2 8 8 8 8 8 8 8 8 9 + +Example 2: + +Input: +6 6 6 6 6 6 6 6 6 6 +8 8 8 8 8 8 8 8 8 8 +8 8 0 8 8 8 8 8 0 8 +8 8 8 8 8 8 0 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 0 8 8 8 8 +8 0 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +1 1 1 1 1 1 1 1 1 1 +Output: +6 6 6 6 6 6 6 6 6 6 +8 8 8 8 8 8 8 8 8 8 +8 8 6 8 8 8 8 8 6 8 +8 8 8 8 8 8 6 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 1 8 8 8 8 +8 1 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +1 1 1 1 1 1 1 1 1 1 + +Example 3: + +Input: +5 5 5 5 5 5 5 5 5 5 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 0 8 8 8 8 +8 8 0 8 8 8 8 8 0 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 0 8 8 8 8 0 8 +8 8 8 8 8 8 0 8 8 8 +8 8 8 8 8 8 8 8 8 8 +7 7 7 7 7 7 7 7 7 7 +Output: +5 5 5 5 5 5 5 5 5 5 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 5 8 8 8 8 +8 8 5 8 8 8 8 8 5 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 8 8 8 8 8 8 8 +8 8 8 7 8 8 8 8 7 8 +8 8 8 8 8 8 7 8 8 8 +8 8 8 8 8 8 8 8 8 8 +7 7 7 7 7 7 7 7 7 7 + + +Below is a test input grid. Predict the corresponding output grid by applying the rule you found. +Your final answer should just be the text output grid itself. + +Input: +6 8 8 8 8 8 8 8 0 4 +6 0 8 8 0 8 8 8 8 4 +6 8 8 8 8 8 8 8 8 4 +6 8 8 8 8 8 0 8 8 4 +6 8 8 0 8 8 8 8 8 4 +6 8 8 8 8 8 0 8 8 4 +6 8 8 8 8 8 8 8 8 4 +6 8 8 8 8 0 8 8 8 4 +6 8 8 0 8 8 8 0 8 4 +6 8 8 8 8 8 8 8 8 4 + +Answer: 6 8 8 8 8 8 8 8 4 4 +6 6 8 8 6 8 8 8 8 4 +6 8 8 8 8 8 8 8 8 4 +6 8 8 8 8 8 4 8 8 4 +6 8 8 6 8 8 8 8 8 4 +6 8 8 8 8 8 4 8 8 4 +6 8 8 8 8 8 8 8 8 4 +6 8 8 8 8 4 8 8 8 4 +6 8 8 6 8 8 8 4 8 4 +6 8 8 8 8 8 8 8 8 4 +Metadata: {'input': ((6, 8, 8, 8, 8, 8, 8, 8, 0, 4), (6, 0, 8, 8, 0, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 0, 8, 8, 4), (6, 8, 8, 0, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 0, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 0, 8, 8, 8, 4), (6, 8, 8, 0, 8, 8, 8, 0, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4)), 'output': ((6, 8, 8, 8, 8, 8, 8, 8, 4, 4), (6, 6, 8, 8, 6, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 4, 8, 8, 4), (6, 8, 8, 6, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 8, 4, 8, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4), (6, 8, 8, 8, 8, 4, 8, 8, 8, 4), (6, 8, 8, 6, 8, 8, 8, 4, 8, 4), (6, 8, 8, 8, 8, 8, 8, 8, 8, 4)), 'task_id': '2204b7a8'} + +Example 3: +Question: Find the common rule that maps an input grid to an output grid, given the examples below. + +Example 1: + +Input: +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 +5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 5 5 5 5 5 +5 5 8 8 8 8 5 5 5 5 5 8 8 8 8 5 5 5 5 5 +2 5 8 8 8 8 5 5 5 5 5 8 8 8 8 5 5 5 5 2 +5 5 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 8 8 8 8 8 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5 +Output: +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 8 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 8 5 +5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 8 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 2 2 2 2 5 5 5 5 5 +5 5 2 2 2 2 5 2 5 5 5 2 2 2 2 5 5 5 5 5 +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 +5 5 2 2 2 2 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 2 2 2 2 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 8 8 8 8 5 5 + +Example 2: + +Input: +5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 5 5 5 5 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 8 8 8 8 8 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 8 8 8 8 8 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 8 8 8 8 8 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +Output: +5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 5 5 2 5 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 5 5 5 2 5 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 2 2 2 2 2 5 8 8 8 5 5 5 5 8 8 8 8 +5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 2 2 2 2 2 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 2 2 2 2 2 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 + +Example 3: + +Input: +5 8 8 8 8 8 5 2 5 5 5 5 5 5 +5 8 8 8 8 8 5 5 5 5 5 8 8 8 +5 5 5 5 5 5 5 5 5 5 5 8 8 8 +5 5 5 5 8 8 8 8 8 8 5 8 8 8 +5 5 5 5 8 8 8 8 8 8 5 8 8 8 +5 5 5 5 8 8 8 8 8 8 5 8 8 8 +8 8 5 5 8 8 8 8 8 8 5 5 5 5 +8 8 5 5 8 8 8 8 8 8 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 5 5 8 8 8 5 5 5 5 +2 5 8 8 8 5 5 8 8 8 5 5 5 2 +5 5 8 8 8 5 5 5 5 5 5 5 5 5 +5 5 8 8 8 5 5 2 5 5 5 5 5 5 +Output: +5 8 8 8 8 8 5 2 5 5 5 5 5 5 +5 8 8 8 8 8 5 2 5 5 5 8 8 8 +5 5 5 5 5 5 5 2 5 5 5 8 8 8 +5 5 5 5 2 2 2 2 2 2 5 8 8 8 +5 5 5 5 2 2 2 2 2 2 5 8 8 8 +5 5 5 5 2 2 2 2 2 2 5 8 8 8 +8 8 5 5 2 2 2 2 2 2 5 5 5 5 +8 8 5 5 2 2 2 2 2 2 5 5 5 5 +5 5 5 5 5 5 5 2 5 5 5 5 5 5 +5 5 2 2 2 5 5 2 2 2 5 5 5 5 +2 2 2 2 2 2 2 2 2 2 2 2 2 2 +5 5 2 2 2 5 5 2 5 5 5 5 5 5 +5 5 2 2 2 5 5 2 5 5 5 5 5 5 + + +Below is a test input grid. Predict the corresponding output grid by applying the rule you found. +Your final answer should just be the text output grid itself. + +Input: +5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5 +5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 8 8 5 5 8 8 8 5 +5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 8 8 8 5 +5 5 5 5 5 8 8 8 8 8 8 8 8 8 5 5 5 5 5 5 5 8 8 8 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 8 8 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +2 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 2 +5 8 8 8 8 8 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 8 8 5 +5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 8 8 5 +5 5 5 5 8 8 8 5 5 5 5 5 5 8 8 8 8 8 5 5 5 5 5 5 5 +2 5 5 5 8 8 8 5 5 5 5 5 5 8 8 8 8 8 5 8 8 8 8 5 2 +5 5 5 5 8 8 8 5 5 8 8 8 5 8 8 8 8 8 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 5 5 5 5 8 8 8 5 5 5 5 5 5 5 8 8 8 8 5 5 +5 5 5 5 5 2 5 5 5 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 + +Answer: 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 8 8 5 5 5 5 5 5 +5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 8 8 5 5 8 8 8 5 +5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 8 8 8 5 +5 5 5 5 5 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 8 8 8 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 +5 2 2 2 2 2 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 8 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 8 8 5 +5 5 5 5 5 2 5 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 8 8 5 +5 5 5 5 2 2 2 5 5 5 5 5 5 2 2 2 2 2 5 5 5 5 5 5 5 +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 +5 5 5 5 2 2 2 5 5 2 2 2 5 2 2 2 2 2 5 2 2 2 2 5 5 +5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 2 2 2 2 5 5 +5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 2 2 2 2 5 5 +5 5 5 5 5 2 5 5 5 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 +Metadata: {'input': ((5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (2, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2), (5, 8, 8, 8, 8, 8, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5), (2, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 5, 2), (5, 5, 5, 5, 8, 8, 8, 5, 5, 8, 8, 8, 5, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5)), 'output': ((5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 8, 8, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 8, 8, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), (5, 2, 2, 2, 2, 2, 5, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 8, 8, 5), (5, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5), (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), (5, 5, 5, 5, 2, 2, 2, 5, 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 5, 5), (5, 5, 5, 5, 5, 2, 5, 5, 5, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5)), 'task_id': '0d87d2a6'} + +```` + ### base_conversion Generates base conversion tasks @@ -2097,7 +2513,11 @@ Input: 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 -Answer: ((3, 9, 3, 9, 3, 9, 3, 9), (3, 9, 3, 9, 3, 9, 3, 3), (3, 9, 3, 9, 3, 9, 9, 9), (3, 9, 3, 9, 3, 3, 3, 3), (3, 9, 3, 9, 9, 9, 9, 9)) +Answer: 3 9 3 9 3 9 3 9 +3 9 3 9 3 9 3 3 +3 9 3 9 3 9 9 9 +3 9 3 9 3 3 3 3 +3 9 3 9 9 9 9 9 Metadata: {'input': ((3, 3, 3, 3, 3, 3, 3, 9), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3), (3, 3, 3, 3, 3, 3, 3, 3)), 'output': ((3, 9, 3, 9, 3, 9, 3, 9), (3, 9, 3, 9, 3, 9, 3, 3), (3, 9, 3, 9, 3, 9, 9, 9), (3, 9, 3, 9, 3, 3, 3, 3), (3, 9, 3, 9, 9, 9, 9, 9)), 'task_id': 'd22278a0', 'difficulty': {'rng': 0.07173948707162241, 'pso': 0.12314814814814816}} Example 2: @@ -2232,7 +2652,14 @@ Input: 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 -Answer: ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)) +Answer: 7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 8 7 7 7 +7 7 7 7 7 7 8 7 8 7 7 +7 7 7 7 7 8 7 8 7 8 7 +7 7 7 7 7 7 8 7 8 7 7 +7 7 7 7 7 7 7 8 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 +7 7 7 7 7 7 7 7 7 7 7 Metadata: {'input': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), 'output': ((7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 8, 7, 8, 7, 8, 7), (7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7), (7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7), (7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), 'task_id': '11852cab', 'difficulty': {'rng': 0.09651305327452808, 'pso': 0.15228956228956228}} Example 3: @@ -2276,7 +2703,10 @@ Input: 1 1 1 1 1 1 1 1 1 1 -Answer: ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1)) +Answer: 1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 Metadata: {'input': ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1)), 'output': ((1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1), (1, 1, 1, 1, 1)), 'task_id': '8be77c9e', 'difficulty': {'rng': 0.09322002370336528, 'pso': 0.0638888888888889}} ```` @@ -2993,7 +3423,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6, Example 2: Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM. Answer: 02:38 -Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 8, 9, 44), 'end_time': datetime.datetime(2025, 2, 8, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} +Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 9, 9, 44), 'end_time': datetime.datetime(2025, 2, 9, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} Example 3: Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days. diff --git a/reasoning_gym/arc/arc_agi.py b/reasoning_gym/arc/arc_agi.py index 92d7ec0d..b96698bb 100644 --- a/reasoning_gym/arc/arc_agi.py +++ b/reasoning_gym/arc/arc_agi.py @@ -22,15 +22,23 @@ class ArcAgiConfig: board_format_opts: BoardFormattingOptions = field(default_factory=lambda: BoardFormattingOptions()) # Augmentation options - use_rotations: bool = True - use_mirrors: bool = True + rotations: list[str] = field(default_factory=lambda: ["90", "180", "270"]) # empty list for no rotations + mirrors: list[str] = field( + default_factory=lambda: ["horizontal", "vertical", "diagonal", "counterdiagonal"] + ) # empty list for no mirrors use_color_permutation: bool = True - + seed: Optional[int] = None size: int = 500 def validate(self): assert self.size > 0, "Size of dataset must be positive." + valid_rotations = ["90", "180", "270"] + valid_mirrors = ["horizontal", "vertical", "diagonal", "counterdiagonal"] + for rot in self.rotations: + assert rot in valid_rotations, f"Invalid rotation option: {rot}" + for mirror in self.mirrors: + assert mirror in valid_mirrors, f"Invalid mirror option: {mirror}" Board = list[list[int]] @@ -103,11 +111,17 @@ class ArcAgiDataset(ProceduralDataset): """Create a composite augmentation function from enabled options""" fns = [] - if self.config.use_rotations: - fns.append(rng.choice(ROTATION_AUGMENTATIONS)) + # Map rotation strings to functions + rotation_map = {"90": rot90, "180": rot180, "270": rot270} + if self.config.rotations: + chosen_rot = rng.choice([identity] + [rotation_map[r] for r in self.config.rotations]) + fns.append(chosen_rot) - if self.config.use_mirrors: - fns.append(rng.choice(MIRROR_AUGMENTATIONS)) + # Map mirror strings to functions + mirror_map = {"horizontal": hmirror, "vertical": vmirror, "diagonal": dmirror, "counterdiagonal": cmirror} + if self.config.mirrors: + chosen_mirror = rng.choice([identity] + [mirror_map[m] for m in self.config.mirrors]) + fns.append(chosen_mirror) if self.config.use_color_permutation: color_table = list(range(10)) diff --git a/tests/test_arc_agi.py b/tests/test_arc_agi.py index bb42e9d8..da43e6ab 100644 --- a/tests/test_arc_agi.py +++ b/tests/test_arc_agi.py @@ -8,10 +8,23 @@ def test_arc_agi_config_validation(): with pytest.raises(AssertionError): ArcAgiConfig(size=0).validate() - # Valid config should not raise + with pytest.raises(AssertionError): + ArcAgiConfig(rotations=["invalid"]).validate() + + with pytest.raises(AssertionError): + ArcAgiConfig(mirrors=["invalid"]).validate() + + # Valid configs should not raise config = ArcAgiConfig(size=10, seed=42) config.validate() + config = ArcAgiConfig(rotations=["90", "180"], mirrors=["horizontal", "diagonal"]) + config.validate() + + # Empty lists should be valid (no augmentations) + config = ArcAgiConfig(rotations=[], mirrors=[]) + config.validate() + def test_arc_agi_deterministic(): """Test dataset reproducibility with fixed seed""" @@ -52,26 +65,36 @@ def test_arc_agi_items(): def test_arc_agi_augmentations(): """Test that augmentations can be selectively enabled/disabled""" # Test with all augmentations disabled - config = ArcAgiConfig(seed=42, size=10, use_rotations=False, use_mirrors=False, use_color_permutation=False) + config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=[], use_color_permutation=False) base_dataset = ArcAgiDataset(config) base_items = list(base_dataset) - # Test with rotations only - rot_config = ArcAgiConfig(seed=42, size=10, use_rotations=True, use_mirrors=False, use_color_permutation=False) + # Test with specific rotation only + rot_config = ArcAgiConfig(seed=42, size=10, rotations=["90"], mirrors=[], use_color_permutation=False) rot_dataset = ArcAgiDataset(rot_config) rot_items = list(rot_dataset) - # Items should differ when rotations are enabled + # Items should differ with rotation enabled assert any( base_items[i]["metadata"]["input"] != rot_items[i]["metadata"]["input"] for i in range(len(base_items)) - ), "Rotation augmentation had no effect" + ), "90-degree rotation augmentation had no effect" + + # Test with specific mirror only + mirror_config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=["horizontal"], use_color_permutation=False) + mirror_dataset = ArcAgiDataset(mirror_config) + mirror_items = list(mirror_dataset) + + # Items should differ with mirror enabled + assert any( + base_items[i]["metadata"]["input"] != mirror_items[i]["metadata"]["input"] for i in range(len(base_items)) + ), "Horizontal mirror augmentation had no effect" # Test with color permutation only - color_config = ArcAgiConfig(seed=42, size=10, use_rotations=False, use_mirrors=False, use_color_permutation=True) + color_config = ArcAgiConfig(seed=42, size=10, rotations=[], mirrors=[], use_color_permutation=True) color_dataset = ArcAgiDataset(color_config) color_items = list(color_dataset) - # Items should differ when color permutation is enabled + # Items should differ with color permutation enabled assert any( base_items[i]["metadata"]["input"] != color_items[i]["metadata"]["input"] for i in range(len(base_items)) ), "Color permutation had no effect"