e2b testing

This commit is contained in:
Zafir Stojanovski 2025-02-26 20:19:52 +01:00
parent b47bf882ce
commit 2ce450486d

View file

@ -2,23 +2,32 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import abc\n",
"import asyncio\n",
"from collections import defaultdict\n",
"import json\n",
"import os\n",
"import random\n",
"from random import Random\n",
"import re\n",
"import signal\n",
"from typing import Union\n",
"\n",
"import aiohttp\n",
"import datasets\n",
"from dotenv import load_dotenv\n",
"import numpy as np\n",
"from sentence_transformers import SentenceTransformer\n",
"from tenacity import (\n",
@ -28,7 +37,11 @@
" wait_exponential,\n",
")\n",
"import torch\n",
"from tqdm.notebook import tqdm"
"from tqdm.notebook import tqdm\n",
"from e2b_code_interpreter import Sandbox\n",
"from e2b import TimeoutException\n",
"\n",
"load_dotenv()"
]
},
{
@ -351,8 +364,6 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"SYSTEM_PROMPT = \"\"\"You are a helpful assistant that generates valid Python functions that act as input generators for a given code snippet.\n",
"\n",
"You have access to `random.Random`, therefore you SHOULD NOT import it again. You should use this random number generator to make the input generation process stochastic on each call.\n",
@ -481,9 +492,26 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filter out invalid input generators\n",
"## Filter out invalid input generators"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to install a template with custom package\n",
"\n",
"**NOTE**: The code below is buggy because we have a memory leak (I think) - every time you run the `exec` with some code snippet, it stores the variables in the global scope. Over time, this will consume all the memory. And besides, running `exec` on untrusted code is not smart."
"https://e2b.dev/docs/quickstart/install-custom-packages\n",
"\n",
"An example e2b.Dockerfile looks like this:\n",
"\n",
"```Dockerfile\n",
"FROM e2bdev/code-interpreter:latest\n",
"\n",
"RUN pip install numpy matplotlib scipy pandas scikit-learn sympy networkx requests pillow bs4 cryptography spacy numba pyyaml regex\n",
"```\n",
"\n",
"However, I am going with the default installed libraries: https://e2b.dev/docs/code-interpreting/analyze-data-with-ai/pre-installed-libraries "
]
},
{
@ -492,33 +520,141 @@
"metadata": {},
"outputs": [],
"source": [
"# def timeout_handler(signum, frame):\n",
"# raise TimeoutError(\"Function call timed out\")\n",
"# Example usage of the Sandbox class\n",
"with Sandbox() as sandbox:\n",
"\n",
"# def get_input_generator_func(code_sample: str, input_generator_str: str) -> dict:\n",
"# env = globals().copy()\n",
"# exec(code_sample, env, env)\n",
"# exec(input_generator_str, env, env)\n",
"# return env['generate_input']\n",
" # First initialize the sandbox\n",
" execution = sandbox.run_code(\"\"\"\n",
"from random import Random # <----- ALWAYS PREPEND THIS LINE TO YOUR CODE SNIPPET\n",
"\n",
"# def execute_code_sample(code_sample: str, input_dict: dict) -> dict:\n",
"# env = globals().copy()\n",
"# exec(code_sample, env, env)\n",
"# main_solution = env['main_solution']\n",
"# return main_solution(**input_dict)\n",
"def hello_world():\n",
" return {\"a\": 5, \"b\": 10}\n",
"\n",
"# NUM_INPUT_GENERATE = 1_000 # how many inputs to try and generate\n",
"# ALARM_TOLERANCE = 1 # in seconds\n",
"# PERCENT_UNIQUE_INPUTS = 0.30 # what fraction of generated inputs should be unique\n",
"# PERCENT_UNIQUE_OUTPUTS = 0.30 # what fraction of generated outputs should be unique\n",
"def multiple_hello_worlds(rng: Random):\n",
" return [\n",
" {\"a\": rng.randint(1, 10), \"b\": rng.randint(10, 20)},\n",
" {\"a\": 10, \"b\": 20},\n",
" ]\n",
"\"\"\"\n",
" )\n",
" try:\n",
" # Run the code snippet\n",
" execution = sandbox.run_code(\"rng = Random(53);multiple_hello_worlds(rng)\", timeout=5)\n",
" print(execution)\n",
" if execution.error:\n",
" print(\"[!! FOUND ERROR !!]\")\n",
" else:\n",
" print(type(execution.text))\n",
" print(execution.text)\n",
" except TimeoutException as e:\n",
" print(e)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CODE_TEMPLATE = \"\"\"from random import Random\n",
"{code_sample}\n",
"\n",
"# signal.signal(signal.SIGALRM, timeout_handler)\n",
"{input_generator}\n",
"\n",
"# rng = random.Random()\n",
"# rng.seed(42)\n",
"def multiple_eval(num_generations: int, seed: int = 42) -> tuple:\n",
" rng = Random(seed)\n",
" inputs = [generate_input(rng) for _ in range(num_generations)]\n",
" outputs = [main_solution(**inp) for inp in inputs]\n",
" return inputs, outputs\n",
"\"\"\"\n",
"\n",
"# errors = defaultdict(int)\n",
"# total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
"SAMPLING_TEMPLATE = \"multiple_eval({num_generations})\"\n",
"\n",
"WARMUP_GENERATIONS = 5\n",
"TOTAL_GENERATIONS = 1_000\n",
"TIMEOUT_CODE_INIT = 10\n",
"TIMEOUT_PER_SAMPLE = 2\n",
"\n",
"errors = defaultdict(int)\n",
"total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
"\n",
"with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
" open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",
"\n",
" iterator = tqdm(enumerate(f_in), total=total_entries)\n",
"\n",
" for i, line in iterator:\n",
" iterator.set_description(f\"Processing {i}/{total_entries} | \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
" entry = json.loads(line)\n",
"\n",
" if not \"input_generator\" in entry:\n",
" errors[\"missing_input_generator\"] += 1\n",
" continue\n",
" \n",
" with Sandbox() as sandbox:\n",
" # 1. Initialize the sandbox\n",
" try: \n",
" execution = sandbox.run_code(\n",
" code=CODE_TEMPLATE.format(**entry), \n",
" timeout=TIMEOUT_CODE_INIT\n",
" )\n",
" assert not execution.error, \"Error in code snippet\"\n",
" except Exception as e:\n",
" errors[\"cannot_initialize_code\"] += 1\n",
" continue\n",
" \n",
" # 2. Warmup the sampling\n",
" try:\n",
" execution = sandbox.run_code(\n",
" code=SAMPLING_TEMPLATE.format(num_generations=WARMUP_GENERATIONS),\n",
" timeout=TIMEOUT_PER_SAMPLE * WARMUP_GENERATIONS\n",
" )\n",
" assert not execution.error, \"Error in input generator (warmup)\"\n",
" assert execution.text, \"Empty input generator output (warmup)\"\n",
" inputs, outputs = eval(execution.text)\n",
" except Exception as e:\n",
" errors[\"warmup_fails\"] += 1\n",
" continue\n",
"\n",
" # 3. Run the full sampling\n",
" try:\n",
" execution = sandbox.run_code(\n",
" code=SAMPLING_TEMPLATE.format(num_generations=TOTAL_GENERATIONS),\n",
" timeout=TIMEOUT_PER_SAMPLE * TOTAL_GENERATIONS\n",
" )\n",
" assert not execution.error, \"Error in input generator (full)\"\n",
" assert execution.text, \"Empty input generator output (full)\"\n",
" inputs, outputs = eval(execution.text)\n",
" assert len(inputs) == TOTAL_GENERATIONS, \"Mismatch in input generations\"\n",
" assert len(outputs) == TOTAL_GENERATIONS, \"Mismatch in output generations\"\n",
" unique_inputs = len(set(hash(json.dumps(inp, sort_keys=True)) for inp in inputs))\n",
" unique_outputs = len(set(hash(json.dumps(out, sort_keys=True)) for out in outputs))\n",
" except:\n",
" errors[\"full_sampling_fails\"] += 1\n",
" continue\n",
" \n",
" # 4. Save the entry\n",
" entry = entry | {\n",
" \"unique_inputs\": unique_inputs,\n",
" \"unique_outputs\": unique_outputs,\n",
" \"total_generations\": TOTAL_GENERATIONS,\n",
" }\n",
" f_out.write(json.dumps(entry))\n",
" f_out.write(\"\\n\")\n",
"\n",
"for k, v in errors.items():\n",
" print(f\"{k}: {v}\")\n",
"print(f\"Total errors: {sum(errors.values())}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"errors = defaultdict(int)\n",
"total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
"\n",
"# with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
"# open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",