mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-25 17:10:49 +00:00
init-commit
This commit is contained in:
commit
18a552597a
3461 changed files with 1150579 additions and 0 deletions
BIN
internbootcamp/libs/re_arc/00d62c1b_generated.png
Executable file
BIN
internbootcamp/libs/re_arc/00d62c1b_generated.png
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 9 KiB |
BIN
internbootcamp/libs/re_arc/00d62c1b_original.png
Executable file
BIN
internbootcamp/libs/re_arc/00d62c1b_original.png
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 5.6 KiB |
24
internbootcamp/libs/re_arc/README.md
Executable file
24
internbootcamp/libs/re_arc/README.md
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
# RE-ARC: Reverse-Engineering the Abstraction and Reasoning Corpus
|
||||
## [Addressing ARC via Procedural Example Generation](https://arxiv.org/abs/2404.07353)
|
||||
|
||||
This repository presents code to procedurally generate examples for the [ARC](https://github.com/fchollet/ARC) training tasks. For each of the 400 tasks, an example generator is provided. See the [demo notebook](demo.ipynb) for example usage of the code and a visualization of the data. The primary entry point is the `generate_dataset` function defined in [main.py](main.py). The file [re_arc.zip](re_arc.zip) contains 1000 verified generated examples for each of the 400 training tasks (re_arc/tasks contains a json file for each ARC task containing an array of json objects each with keys "input" and "output") alongside two difficulty metrics for each example and some task-level metadata about runtime and sample-efficiency, the result of running the notebook, which calls `generate_dataset` with the default parameter values. The only major dependency is the [ARC-DSL](https://github.com/michaelhodel/arc-dsl), which is however included as a single file in [dsl.py](dsl.py), as it is not provided as a Python package. Other relevant files are [generators.py](generators.py), which contains the task-specific example generators, and [verifiers.py](verifiers.py), which contains the corresponding task solver programs used for keeping only generated examples that are valid.
|
||||
|
||||
For a more in-depth description of the work, see the [notes on arxiv](https://arxiv.org/abs/2404.07353).
|
||||
|
||||
|
||||
### Example usage:
|
||||
|
||||
```python
|
||||
from main import demo_generator
|
||||
demo_generator('00d62c1b')
|
||||
```
|
||||
|
||||
#### 00d62c1b (original)
|
||||
|
||||
")
|
||||
|
||||
|
||||
#### 00d62c1b (generated)
|
||||
|
||||
")
|
||||
|
||||
4
internbootcamp/libs/re_arc/__init__.py
Executable file
4
internbootcamp/libs/re_arc/__init__.py
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
# from bootcamp_utils.jsonlines import read_jsonl
|
||||
|
||||
|
||||
# arc_keys = read_jsonl('libs/data/arc_key.jsonl')
|
||||
1599
internbootcamp/libs/re_arc/dsl.py
Executable file
1599
internbootcamp/libs/re_arc/dsl.py
Executable file
File diff suppressed because it is too large
Load diff
14962
internbootcamp/libs/re_arc/generators.py
Executable file
14962
internbootcamp/libs/re_arc/generators.py
Executable file
File diff suppressed because it is too large
Load diff
204
internbootcamp/libs/re_arc/main.py
Executable file
204
internbootcamp/libs/re_arc/main.py
Executable file
|
|
@ -0,0 +1,204 @@
|
|||
import time
|
||||
import tqdm
|
||||
import os
|
||||
import json
|
||||
|
||||
from random import seed as set_seed
|
||||
|
||||
from . import dsl
|
||||
from .dsl import *
|
||||
|
||||
from . import utils
|
||||
from .utils import *
|
||||
|
||||
from . import generators
|
||||
from . import verifiers
|
||||
|
||||
|
||||
|
||||
def get_generators() -> dict:
|
||||
"""
|
||||
returns mapper from task identifiers (keys) to example generator functions
|
||||
"""
|
||||
prefix = 'generate_'
|
||||
return {
|
||||
strip_prefix(n, prefix): getattr(generators, n) for n in dir(generators) if n.startswith(prefix)
|
||||
}
|
||||
|
||||
|
||||
def get_verifiers() -> dict:
|
||||
"""
|
||||
returns mapper from task identifiers (keys) to example verifier functions
|
||||
"""
|
||||
prefix = 'verify_'
|
||||
return {
|
||||
strip_prefix(n, prefix): getattr(verifiers, n) for n in dir(verifiers) if n.startswith(prefix)
|
||||
}
|
||||
|
||||
|
||||
def get_rng_difficulty(
|
||||
example: dict
|
||||
) -> float:
|
||||
"""
|
||||
RNG-Difficulty: proxy measure for example difficulty, defined as the mean of sampled floats within example generation
|
||||
"""
|
||||
rng = getattr(utils, 'rng')
|
||||
setattr(utils, 'rng', [])
|
||||
return sum(rng) / len(rng)
|
||||
|
||||
|
||||
def get_pso_difficulty(
|
||||
example: dict
|
||||
) -> float:
|
||||
"""
|
||||
PSO-Difficulty: proxy measure for example difficulty, defined as weighted sum of #Pixels, #Symbols, #Objects
|
||||
"""
|
||||
i, o = example['input'], example['output']
|
||||
hwi = height(i) * width(i)
|
||||
hwo = height(o) * width(o)
|
||||
pix_pct = (hwi + hwo) / 1800
|
||||
col_pct = len(palette(i) | palette(o)) / 10
|
||||
obj_dens = (len(objects(i, T, F, F)) / hwi + len(objects(o, T, F, F)) / hwo) / 2
|
||||
return (pix_pct + col_pct + obj_dens) / 3
|
||||
|
||||
|
||||
def demo_generator(key, n=6):
|
||||
with open(f'arc_original/training/{key}.json', 'r') as fp:
|
||||
original_task = json.load(fp)
|
||||
original_task = original_task['train'] + original_task['test']
|
||||
generator = getattr(generators, f'generate_{key}')
|
||||
generated_examples = [generator(0, 1) for k in range(n)]
|
||||
plot_task(original_task)
|
||||
plot_task(generated_examples)
|
||||
|
||||
|
||||
def generate_dataset(
|
||||
path: str = 're_arc',
|
||||
seed: int = 42,
|
||||
n_examples: int = 1000,
|
||||
diff_lb: float = 0,
|
||||
diff_ub: float = 1
|
||||
) -> None:
|
||||
"""
|
||||
generates dataset
|
||||
|
||||
path: which folder to save data to
|
||||
seed: for deterministic generation / reproducibility
|
||||
n_examples: number of examples per task
|
||||
diff_lb: lower bound for difficulty
|
||||
diff_ub: upper bound for difficulty
|
||||
"""
|
||||
set_seed(seed)
|
||||
os.makedirs(path)
|
||||
tasks_path = os.path.join(path, 'tasks')
|
||||
os.makedirs(tasks_path)
|
||||
generators_mapper = get_generators()
|
||||
verifiers_mapper = get_verifiers()
|
||||
keys = sorted(generators_mapper.keys())
|
||||
k = len(keys)
|
||||
desc = f'task 0/{k}, example 0/{n_examples}'
|
||||
pbar = tqdm.tqdm(enumerate(keys), desc=desc, position=0, leave=True, total=k)
|
||||
metadata = dict()
|
||||
for i, key in pbar:
|
||||
generator = generators_mapper[key]
|
||||
verifier = verifiers_mapper[key]
|
||||
seen = set()
|
||||
examples = []
|
||||
stats = {
|
||||
'n_generations': 0, 'n_verified': 0, 'n_nondegenerate': 0,
|
||||
'rng_difficulties': [], 'pso_difficulties': []
|
||||
}
|
||||
start = time.time()
|
||||
while len(examples) < n_examples:
|
||||
example, identifier, success = None, None, True
|
||||
try:
|
||||
example = generator(diff_lb, diff_ub)
|
||||
assert is_grid(example['input'])
|
||||
assert is_grid(example['output'])
|
||||
identifier = hash(example['input'])
|
||||
stats['n_generations'] += 1
|
||||
except:
|
||||
success = False
|
||||
try:
|
||||
assert success and verifier(example['input']) == example['output']
|
||||
stats['n_verified'] += 1
|
||||
except:
|
||||
success = False
|
||||
try:
|
||||
assert success and example['input'] != example['output']
|
||||
stats['n_nondegenerate'] += 1
|
||||
except:
|
||||
success = False
|
||||
if success and identifier not in seen:
|
||||
examples.append(example)
|
||||
seen.add(identifier)
|
||||
stats['rng_difficulties'].append(get_rng_difficulty(example))
|
||||
stats['pso_difficulties'].append(get_pso_difficulty(example))
|
||||
desc = f'task {i+1}/{k}, example {len(examples)}/{n_examples}'
|
||||
pbar.set_description(desc)
|
||||
end = time.time()
|
||||
stats['runtime'] = end - start
|
||||
with open(os.path.join(tasks_path, f'{key}.json'), 'w') as fp:
|
||||
json.dump(examples, fp)
|
||||
metadata[key] = stats
|
||||
with open(os.path.join(path, 'metadata.json'), 'w') as fp:
|
||||
json.dump(metadata, fp)
|
||||
|
||||
|
||||
def demo_dataset(
|
||||
folder: str = 're_arc',
|
||||
n: int = 8,
|
||||
s: int = 0,
|
||||
e: int = 400
|
||||
) -> None:
|
||||
"""
|
||||
visualizing snippets from a generated dataset (original, easy, medium and hard instances for each task)
|
||||
"""
|
||||
with open(f'{folder}/metadata.json', 'r') as fp:
|
||||
metadata = json.load(fp)
|
||||
for i, fn in enumerate(sorted(os.listdir(f'{folder}/tasks'))):
|
||||
if s <= i < e:
|
||||
key = fn[:8]
|
||||
with open(f'arc_original/training/{key}.json', 'r') as fp:
|
||||
original_task = json.load(fp)
|
||||
with open(f'{folder}/tasks/{key}.json', 'r') as fp:
|
||||
generated_task = json.load(fp)
|
||||
original_task = [format_example(example) for example in original_task['train'] + original_task['test']]
|
||||
generated_task = [format_example(example) for example in generated_task[:10*n]]
|
||||
difficulties = metadata[key]['pso_difficulties'][:9*n]
|
||||
generated_task = [ex for ex, diff in sorted(zip(generated_task, difficulties), key=lambda item: item[1])]
|
||||
easy = generated_task[1*n:2*n]
|
||||
hard = generated_task[8*n:9*n]
|
||||
print(key)
|
||||
print('original:')
|
||||
plot_task(original_task)
|
||||
print('generated (easy):')
|
||||
plot_task(easy)
|
||||
print('generated (hard):')
|
||||
plot_task(hard)
|
||||
|
||||
|
||||
def evaluate_verifiers_on_original_tasks() -> None:
|
||||
"""
|
||||
runs the verifiers on the original ARC training tasks
|
||||
"""
|
||||
verifiers = get_verifiers()
|
||||
dataset = dict()
|
||||
for key in verifiers.keys():
|
||||
with open(f'arc_original/training/{key}.json', 'r') as fp:
|
||||
task = json.load(fp)
|
||||
dataset[key] = format_task(task)
|
||||
fix_bugs(dataset)
|
||||
failed_on = set()
|
||||
for key, verifier in verifiers.items():
|
||||
task = dataset[key]
|
||||
try:
|
||||
for example in task['train'] + task['test']:
|
||||
assert verifier(example['input']) == example['output']
|
||||
except:
|
||||
failed_on.add(key)
|
||||
n = len(dataset)
|
||||
k = len(failed_on)
|
||||
print(f'verification programs work for all examples for {n-k}/{n} tasks')
|
||||
print(f'verification fails (on one example) for tasks {failed_on}')
|
||||
|
||||
135
internbootcamp/libs/re_arc/utils.py
Executable file
135
internbootcamp/libs/re_arc/utils.py
Executable file
|
|
@ -0,0 +1,135 @@
|
|||
import matplotlib.pyplot as plt
|
||||
from matplotlib.colors import ListedColormap, Normalize
|
||||
|
||||
from random import choice, randint, sample, shuffle, uniform
|
||||
|
||||
from .dsl import *
|
||||
|
||||
|
||||
global rng
|
||||
rng = []
|
||||
|
||||
|
||||
def unifint(
|
||||
diff_lb: float,
|
||||
diff_ub: float,
|
||||
bounds: Tuple[int, int]
|
||||
) -> int:
|
||||
"""
|
||||
diff_lb: lower bound for difficulty, must be in range [0, diff_ub]
|
||||
diff_ub: upper bound for difficulty, must be in range [diff_lb, 1]
|
||||
bounds: interval [a, b] determining the integer values that can be sampled
|
||||
"""
|
||||
a, b = bounds
|
||||
d = uniform(diff_lb, diff_ub)
|
||||
global rng
|
||||
rng.append(d)
|
||||
return min(max(a, round(a + (b - a) * d)), b)
|
||||
|
||||
|
||||
def is_grid(
|
||||
grid: Any
|
||||
) -> bool:
|
||||
"""
|
||||
returns True if and only if argument is a valid grid
|
||||
"""
|
||||
if not isinstance(grid, tuple):
|
||||
return False
|
||||
if not 0 < len(grid) <= 30:
|
||||
return False
|
||||
if not all(isinstance(r, tuple) for r in grid):
|
||||
return False
|
||||
if not all(0 < len(r) <= 30 for r in grid):
|
||||
return False
|
||||
if not len(set(len(r) for r in grid)) == 1:
|
||||
return False
|
||||
if not all(all(isinstance(x, int) for x in r) for r in grid):
|
||||
return False
|
||||
if not all(all(0 <= x <= 9 for x in r) for r in grid):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def strip_prefix(
|
||||
string: str,
|
||||
prefix: str
|
||||
) -> str:
|
||||
"""
|
||||
removes prefix
|
||||
"""
|
||||
return string[len(prefix):]
|
||||
|
||||
|
||||
def format_grid(
|
||||
grid: List[List[int]]
|
||||
) -> Grid:
|
||||
"""
|
||||
grid type casting
|
||||
"""
|
||||
return tuple(tuple(row) for row in grid)
|
||||
|
||||
|
||||
def format_example(
|
||||
example: dict
|
||||
) -> dict:
|
||||
"""
|
||||
example data type
|
||||
"""
|
||||
return {
|
||||
'input': format_grid(example['input']),
|
||||
'output': format_grid(example['output'])
|
||||
}
|
||||
|
||||
|
||||
def format_task(
|
||||
task: dict
|
||||
) -> dict:
|
||||
"""
|
||||
task data type
|
||||
"""
|
||||
return {
|
||||
'train': [format_example(example) for example in task['train']],
|
||||
'test': [format_example(example) for example in task['test']]
|
||||
}
|
||||
|
||||
|
||||
def plot_task(
|
||||
task: List[dict],
|
||||
title: str = None
|
||||
) -> None:
|
||||
"""
|
||||
displays a task
|
||||
"""
|
||||
cmap = ListedColormap([
|
||||
'#000', '#0074D9', '#FF4136', '#2ECC40', '#FFDC00',
|
||||
'#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25'
|
||||
])
|
||||
norm = Normalize(vmin=0, vmax=9)
|
||||
args = {'cmap': cmap, 'norm': norm}
|
||||
height = 2
|
||||
width = len(task)
|
||||
figure_size = (width * 3, height * 3)
|
||||
figure, axes = plt.subplots(height, width, figsize=figure_size)
|
||||
for column, example in enumerate(task):
|
||||
axes[0, column].imshow(example['input'], **args)
|
||||
axes[1, column].imshow(example['output'], **args)
|
||||
axes[0, column].axis('off')
|
||||
axes[1, column].axis('off')
|
||||
if title is not None:
|
||||
figure.suptitle(title, fontsize=20)
|
||||
plt.subplots_adjust(wspace=0.1, hspace=0.1)
|
||||
plt.show()
|
||||
|
||||
|
||||
def fix_bugs(
|
||||
dataset: dict
|
||||
) -> None:
|
||||
"""
|
||||
fixes bugs in the original ARC training dataset
|
||||
"""
|
||||
dataset['a8d7556c']['train'][2]['output'] = fill(dataset['a8d7556c']['train'][2]['output'], 2, {(8, 12), (9, 12)})
|
||||
dataset['6cf79266']['train'][2]['output'] = fill(dataset['6cf79266']['train'][2]['output'], 1, {(6, 17), (7, 17), (8, 15), (8, 16), (8, 17)})
|
||||
dataset['469497ad']['train'][1]['output'] = fill(dataset['469497ad']['train'][1]['output'], 7, {(5, 12), (5, 13), (5, 14)})
|
||||
dataset['9edfc990']['train'][1]['output'] = fill(dataset['9edfc990']['train'][1]['output'], 1, {(6, 13)})
|
||||
dataset['e5062a87']['train'][1]['output'] = fill(dataset['e5062a87']['train'][1]['output'], 2, {(1, 3), (1, 4), (1, 5), (1, 6)})
|
||||
dataset['e5062a87']['train'][0]['output'] = fill(dataset['e5062a87']['train'][0]['output'], 2, {(5, 2), (6, 3), (3, 6), (4, 7)})
|
||||
10900
internbootcamp/libs/re_arc/verifiers.py
Executable file
10900
internbootcamp/libs/re_arc/verifiers.py
Executable file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue