diff --git a/notebooks/codeio.ipynb b/notebooks/codeio.ipynb index 0449f499..63abb71b 100644 --- a/notebooks/codeio.ipynb +++ b/notebooks/codeio.ipynb @@ -8,11 +8,13 @@ "source": [ "import abc\n", "import asyncio\n", + "from collections import defaultdict\n", "import json\n", "import os\n", "import random\n", "from random import Random\n", "import re\n", + "import signal\n", "from typing import Union\n", "\n", "import aiohttp\n", @@ -345,66 +347,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3a63a9127ee24e039b91c83a714ee994", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/7053 [00:00... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n", - "Could not find ... block in response\n" - ] - } - ], + "outputs": [], "source": [ "\n", "\n", @@ -532,21 +477,106 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter out invalid input generators\n", + "\n", + "**NOTE**: The code below is buggy because we have a memory leak (I think) - every time you run the `exec` with some code snippet, it stores the variables in the global scope. Over time, this will consume all the memory. And besides, running `exec` on untrusted code is not smart." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Example of how to execute the generated code\n", - "# local_dict = {}\n", - "# exec(data['input_generator'], globals(), local_dict)\n", - "# generate_input_func = local_dict['generate_input']\n", - "# rng = random.Random()\n", + "# def timeout_handler(signum, frame):\n", + "# raise TimeoutError(\"Function call timed out\")\n", "\n", - "# for i in range(5):\n", - "# random_input = generate_input_func(rng)\n", - "# print(f\"[{i}]: {random_input}\")" + "# def get_input_generator_func(code_sample: str, input_generator_str: str) -> dict:\n", + "# env = globals().copy()\n", + "# exec(code_sample, env, env)\n", + "# exec(input_generator_str, env, env)\n", + "# return env['generate_input']\n", + "\n", + "# def execute_code_sample(code_sample: str, input_dict: dict) -> dict:\n", + "# env = globals().copy()\n", + "# exec(code_sample, env, env)\n", + "# main_solution = env['main_solution']\n", + "# return main_solution(**input_dict)\n", + "\n", + "# NUM_INPUT_GENERATE = 1_000 # how many inputs to try and generate\n", + "# ALARM_TOLERANCE = 1 # in seconds\n", + "# PERCENT_UNIQUE_INPUTS = 0.30 # what fraction of generated inputs should be unique\n", + "# PERCENT_UNIQUE_OUTPUTS = 0.30 # what fraction of generated outputs should be unique\n", + "\n", + "# signal.signal(signal.SIGALRM, timeout_handler)\n", + "\n", + "# rng = random.Random()\n", + "# rng.seed(42)\n", + "\n", + "# errors = defaultdict(int)\n", + "# total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n", + "\n", + "# with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n", + "# open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n", + "\n", + "# iterator = tqdm(enumerate(f_in), total=total_entries)\n", + "\n", + "# for i, line in iterator:\n", + "# iterator.set_description(f\"Processing {i}/{total_entries} | \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n", + "# entry = json.loads(line)\n", + "# # Check if input generator is present\n", + "# if not \"input_generator\" in entry:\n", + "# errors[\"missing_input_generator\"] += 1\n", + "# continue\n", + " \n", + "# # Check if input generator is valid function\n", + "# try:\n", + "# input_generator_func = get_input_generator_func(entry['code_sample'], entry['input_generator'])\n", + "# except Exception as e:\n", + "# errors[\"cannot_instantiate_input_generator\"] += 1\n", + "# continue\n", + "\n", + "# skip = False\n", + "# seen_inputs, seen_outputs = set(), set()\n", + "\n", + "# for _ in range(NUM_INPUT_GENERATE):\n", + "# try:\n", + "# # Check if you can generate input\n", + "# signal.alarm(ALARM_TOLERANCE)\n", + "# random_input = input_generator_func(rng)\n", + "# signal.alarm(0)\n", + "# seen_inputs.add(hash(json.dumps(random_input)))\n", + "\n", + "# # Check if code snippet can execute with generated input\n", + "# signal.alarm(ALARM_TOLERANCE)\n", + "# random_output = execute_code_sample(entry[\"code_sample\"], random_input)\n", + "# signal.alarm(0)\n", + "# seen_outputs.add(hash(json.dumps(random_output)))\n", + "# except Exception as e:\n", + "# signal.alarm(0)\n", + "# errors[\"unreliable_input_generator\"] += 1\n", + "# skip = True\n", + "# break\n", + "# if skip: \n", + "# continue\n", + " \n", + "# if len(seen_inputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_INPUTS:\n", + "# errors[\"insufficient_unique_inputs\"] += 1\n", + "# continue\n", + " \n", + "# if len(seen_outputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_OUTPUTS:\n", + "# errors[\"insufficient_unique_outputs\"] += 1\n", + "# continue\n", + "\n", + "# f_out.write(json.dumps(entry))\n", + "# f_out.write(\"\\n\")\n", + "\n", + "# for k, v in errors.items():\n", + "# print(f\"{k}: {v}\")" ] }, {