mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
filtering
This commit is contained in:
parent
f19498edb8
commit
8a0423f185
1 changed files with 97 additions and 67 deletions
|
|
@ -8,11 +8,13 @@
|
||||||
"source": [
|
"source": [
|
||||||
"import abc\n",
|
"import abc\n",
|
||||||
"import asyncio\n",
|
"import asyncio\n",
|
||||||
|
"from collections import defaultdict\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import random\n",
|
"import random\n",
|
||||||
"from random import Random\n",
|
"from random import Random\n",
|
||||||
"import re\n",
|
"import re\n",
|
||||||
|
"import signal\n",
|
||||||
"from typing import Union\n",
|
"from typing import Union\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import aiohttp\n",
|
"import aiohttp\n",
|
||||||
|
|
@ -345,66 +347,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
|
||||||
"model_id": "3a63a9127ee24e039b91c83a714ee994",
|
|
||||||
"version_major": 2,
|
|
||||||
"version_minor": 0
|
|
||||||
},
|
|
||||||
"text/plain": [
|
|
||||||
" 0%| | 0/7053 [00:00<?, ?it/s]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n",
|
|
||||||
"Could not find <function>...</function> block in response\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -532,21 +477,106 @@
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Filter out invalid input generators\n",
|
||||||
|
"\n",
|
||||||
|
"**NOTE**: The code below is buggy because we have a memory leak (I think) - every time you run the `exec` with some code snippet, it stores the variables in the global scope. Over time, this will consume all the memory. And besides, running `exec` on untrusted code is not smart."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Example of how to execute the generated code\n",
|
"# def timeout_handler(signum, frame):\n",
|
||||||
"# local_dict = {}\n",
|
"# raise TimeoutError(\"Function call timed out\")\n",
|
||||||
"# exec(data['input_generator'], globals(), local_dict)\n",
|
|
||||||
"# generate_input_func = local_dict['generate_input']\n",
|
|
||||||
"# rng = random.Random()\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# for i in range(5):\n",
|
"# def get_input_generator_func(code_sample: str, input_generator_str: str) -> dict:\n",
|
||||||
"# random_input = generate_input_func(rng)\n",
|
"# env = globals().copy()\n",
|
||||||
"# print(f\"[{i}]: {random_input}\")"
|
"# exec(code_sample, env, env)\n",
|
||||||
|
"# exec(input_generator_str, env, env)\n",
|
||||||
|
"# return env['generate_input']\n",
|
||||||
|
"\n",
|
||||||
|
"# def execute_code_sample(code_sample: str, input_dict: dict) -> dict:\n",
|
||||||
|
"# env = globals().copy()\n",
|
||||||
|
"# exec(code_sample, env, env)\n",
|
||||||
|
"# main_solution = env['main_solution']\n",
|
||||||
|
"# return main_solution(**input_dict)\n",
|
||||||
|
"\n",
|
||||||
|
"# NUM_INPUT_GENERATE = 1_000 # how many inputs to try and generate\n",
|
||||||
|
"# ALARM_TOLERANCE = 1 # in seconds\n",
|
||||||
|
"# PERCENT_UNIQUE_INPUTS = 0.30 # what fraction of generated inputs should be unique\n",
|
||||||
|
"# PERCENT_UNIQUE_OUTPUTS = 0.30 # what fraction of generated outputs should be unique\n",
|
||||||
|
"\n",
|
||||||
|
"# signal.signal(signal.SIGALRM, timeout_handler)\n",
|
||||||
|
"\n",
|
||||||
|
"# rng = random.Random()\n",
|
||||||
|
"# rng.seed(42)\n",
|
||||||
|
"\n",
|
||||||
|
"# errors = defaultdict(int)\n",
|
||||||
|
"# total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
|
||||||
|
"\n",
|
||||||
|
"# with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
|
||||||
|
"# open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",
|
||||||
|
"\n",
|
||||||
|
"# iterator = tqdm(enumerate(f_in), total=total_entries)\n",
|
||||||
|
"\n",
|
||||||
|
"# for i, line in iterator:\n",
|
||||||
|
"# iterator.set_description(f\"Processing {i}/{total_entries} | \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
|
||||||
|
"# entry = json.loads(line)\n",
|
||||||
|
"# # Check if input generator is present\n",
|
||||||
|
"# if not \"input_generator\" in entry:\n",
|
||||||
|
"# errors[\"missing_input_generator\"] += 1\n",
|
||||||
|
"# continue\n",
|
||||||
|
" \n",
|
||||||
|
"# # Check if input generator is valid function\n",
|
||||||
|
"# try:\n",
|
||||||
|
"# input_generator_func = get_input_generator_func(entry['code_sample'], entry['input_generator'])\n",
|
||||||
|
"# except Exception as e:\n",
|
||||||
|
"# errors[\"cannot_instantiate_input_generator\"] += 1\n",
|
||||||
|
"# continue\n",
|
||||||
|
"\n",
|
||||||
|
"# skip = False\n",
|
||||||
|
"# seen_inputs, seen_outputs = set(), set()\n",
|
||||||
|
"\n",
|
||||||
|
"# for _ in range(NUM_INPUT_GENERATE):\n",
|
||||||
|
"# try:\n",
|
||||||
|
"# # Check if you can generate input\n",
|
||||||
|
"# signal.alarm(ALARM_TOLERANCE)\n",
|
||||||
|
"# random_input = input_generator_func(rng)\n",
|
||||||
|
"# signal.alarm(0)\n",
|
||||||
|
"# seen_inputs.add(hash(json.dumps(random_input)))\n",
|
||||||
|
"\n",
|
||||||
|
"# # Check if code snippet can execute with generated input\n",
|
||||||
|
"# signal.alarm(ALARM_TOLERANCE)\n",
|
||||||
|
"# random_output = execute_code_sample(entry[\"code_sample\"], random_input)\n",
|
||||||
|
"# signal.alarm(0)\n",
|
||||||
|
"# seen_outputs.add(hash(json.dumps(random_output)))\n",
|
||||||
|
"# except Exception as e:\n",
|
||||||
|
"# signal.alarm(0)\n",
|
||||||
|
"# errors[\"unreliable_input_generator\"] += 1\n",
|
||||||
|
"# skip = True\n",
|
||||||
|
"# break\n",
|
||||||
|
"# if skip: \n",
|
||||||
|
"# continue\n",
|
||||||
|
" \n",
|
||||||
|
"# if len(seen_inputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_INPUTS:\n",
|
||||||
|
"# errors[\"insufficient_unique_inputs\"] += 1\n",
|
||||||
|
"# continue\n",
|
||||||
|
" \n",
|
||||||
|
"# if len(seen_outputs) / NUM_INPUT_GENERATE < PERCENT_UNIQUE_OUTPUTS:\n",
|
||||||
|
"# errors[\"insufficient_unique_outputs\"] += 1\n",
|
||||||
|
"# continue\n",
|
||||||
|
"\n",
|
||||||
|
"# f_out.write(json.dumps(entry))\n",
|
||||||
|
"# f_out.write(\"\\n\")\n",
|
||||||
|
"\n",
|
||||||
|
"# for k, v in errors.items():\n",
|
||||||
|
"# print(f\"{k}: {v}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue