e2b testing

2026-04-19 12:58:07 +00:00 · 2025-02-26 20:19:52 +01:00 · 2025-02-26 20:19:52 +01:00 · 2ce450486d
commit 2ce450486d
parent b47bf882ce
1 changed files with 167 additions and 31 deletions
--- a/notebooks/codeio.ipynb
+++ b/notebooks/codeio.ipynb
@ -2,23 +2,32 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "import abc\n",
    "import asyncio\n",
    "from collections import defaultdict\n",
    "import json\n",
    "import os\n",
-    "import random\n",
-    "from random import Random\n",
    "import re\n",
-    "import signal\n",
    "from typing import Union\n",
    "\n",
    "import aiohttp\n",
    "import datasets\n",
+    "from dotenv import load_dotenv\n",
    "import numpy as np\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from tenacity import (\n",
@ -28,7 +37,11 @@
    "    wait_exponential,\n",
    ")\n",
    "import torch\n",
-    "from tqdm.notebook import tqdm"
+    "from tqdm.notebook import tqdm\n",
+    "from e2b_code_interpreter import Sandbox\n",
+    "from e2b import TimeoutException\n",
+    "\n",
+    "load_dotenv()"
   ]
  },
  {
@ -351,8 +364,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "\n",
-    "\n",
    "SYSTEM_PROMPT = \"\"\"You are a helpful assistant that generates valid Python functions that act as input generators for a given code snippet.\n",
    "\n",
    "You have access to `random.Random`, therefore you SHOULD NOT import it again. You should use this random number generator to make the input generation process stochastic on each call.\n",
@ -481,9 +492,26 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Filter out invalid input generators\n",
+    "## Filter out invalid input generators"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want to install a template with custom package\n",
    "\n",
-    "**NOTE**: The code below is buggy because we have a memory leak (I think) - every time you run the `exec` with some code snippet, it stores the variables in the global scope. Over time, this will consume all the memory. And besides, running `exec` on untrusted code is not smart."
+    "https://e2b.dev/docs/quickstart/install-custom-packages\n",
+    "\n",
+    "An example e2b.Dockerfile looks like this:\n",
+    "\n",
+    "```Dockerfile\n",
+    "FROM e2bdev/code-interpreter:latest\n",
+    "\n",
+    "RUN pip install numpy matplotlib scipy pandas scikit-learn sympy networkx requests pillow bs4 cryptography spacy numba pyyaml regex\n",
+    "```\n",
+    "\n",
+    "However, I am going with the default installed libraries: https://e2b.dev/docs/code-interpreting/analyze-data-with-ai/pre-installed-libraries "
   ]
  },
  {
@ -492,33 +520,141 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# def timeout_handler(signum, frame):\n",
-    "#     raise TimeoutError(\"Function call timed out\")\n",
+    "# Example usage of the Sandbox class\n",
+    "with Sandbox() as sandbox:\n",
    "\n",
-    "# def get_input_generator_func(code_sample: str, input_generator_str: str) -> dict:\n",
-    "#     env = globals().copy()\n",
-    "#     exec(code_sample, env, env)\n",
-    "#     exec(input_generator_str, env, env)\n",
-    "#     return env['generate_input']\n",
+    "    # First initialize the sandbox\n",
+    "    execution = sandbox.run_code(\"\"\"\n",
+    "from random import Random   # <----- ALWAYS PREPEND THIS LINE TO YOUR CODE SNIPPET\n",
    "\n",
-    "# def execute_code_sample(code_sample: str, input_dict: dict) -> dict:\n",
-    "#     env = globals().copy()\n",
-    "#     exec(code_sample, env, env)\n",
-    "#     main_solution = env['main_solution']\n",
-    "#     return main_solution(**input_dict)\n",
+    "def hello_world():\n",
+    "    return {\"a\": 5, \"b\": 10}\n",
    "\n",
-    "# NUM_INPUT_GENERATE = 1_000 # how many inputs to try and generate\n",
-    "# ALARM_TOLERANCE = 1 # in seconds\n",
-    "# PERCENT_UNIQUE_INPUTS = 0.30 # what fraction of generated inputs should be unique\n",
-    "# PERCENT_UNIQUE_OUTPUTS = 0.30 # what fraction of generated outputs should be unique\n",
+    "def multiple_hello_worlds(rng: Random):\n",
+    "    return [\n",
+    "        {\"a\": rng.randint(1, 10), \"b\": rng.randint(10, 20)},\n",
+    "        {\"a\": 10, \"b\": 20},\n",
+    "    ]\n",
+    "\"\"\"\n",
+    "    )\n",
+    "    try:\n",
+    "        # Run the code snippet\n",
+    "        execution = sandbox.run_code(\"rng = Random(53);multiple_hello_worlds(rng)\", timeout=5)\n",
+    "        print(execution)\n",
+    "        if execution.error:\n",
+    "            print(\"[!! FOUND ERROR !!]\")\n",
+    "        else:\n",
+    "            print(type(execution.text))\n",
+    "            print(execution.text)\n",
+    "    except TimeoutException as e:\n",
+    "        print(e)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CODE_TEMPLATE = \"\"\"from random import Random\n",
+    "{code_sample}\n",
    "\n",
-    "# signal.signal(signal.SIGALRM, timeout_handler)\n",
+    "{input_generator}\n",
    "\n",
-    "# rng = random.Random()\n",
-    "# rng.seed(42)\n",
+    "def multiple_eval(num_generations: int, seed: int = 42) -> tuple:\n",
+    "    rng = Random(seed)\n",
+    "    inputs = [generate_input(rng) for _ in range(num_generations)]\n",
+    "    outputs = [main_solution(**inp) for inp in inputs]\n",
+    "    return inputs, outputs\n",
+    "\"\"\"\n",
    "\n",
-    "# errors = defaultdict(int)\n",
-    "# total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
+    "SAMPLING_TEMPLATE = \"multiple_eval({num_generations})\"\n",
+    "\n",
+    "WARMUP_GENERATIONS = 5\n",
+    "TOTAL_GENERATIONS = 1_000\n",
+    "TIMEOUT_CODE_INIT = 10\n",
+    "TIMEOUT_PER_SAMPLE = 2\n",
+    "\n",
+    "errors = defaultdict(int)\n",
+    "total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
+    "\n",
+    "with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
+    "    open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",
+    "\n",
+    "    iterator = tqdm(enumerate(f_in), total=total_entries)\n",
+    "\n",
+    "    for i, line in iterator:\n",
+    "        iterator.set_description(f\"Processing {i}/{total_entries} | \" + \" | \".join(f\"{k}: {v}\" for k, v in errors.items()) + f\" | total: {sum(errors.values())}\")\n",
+    "        entry = json.loads(line)\n",
+    "\n",
+    "        if not \"input_generator\" in entry:\n",
+    "            errors[\"missing_input_generator\"] += 1\n",
+    "            continue\n",
+    "        \n",
+    "        with Sandbox() as sandbox:\n",
+    "            # 1. Initialize the sandbox\n",
+    "            try: \n",
+    "                execution = sandbox.run_code(\n",
+    "                    code=CODE_TEMPLATE.format(**entry), \n",
+    "                    timeout=TIMEOUT_CODE_INIT\n",
+    "                )\n",
+    "                assert not execution.error, \"Error in code snippet\"\n",
+    "            except Exception as e:\n",
+    "                errors[\"cannot_initialize_code\"] += 1\n",
+    "                continue\n",
+    "            \n",
+    "            # 2. Warmup the sampling\n",
+    "            try:\n",
+    "                execution = sandbox.run_code(\n",
+    "                    code=SAMPLING_TEMPLATE.format(num_generations=WARMUP_GENERATIONS),\n",
+    "                    timeout=TIMEOUT_PER_SAMPLE * WARMUP_GENERATIONS\n",
+    "                )\n",
+    "                assert not execution.error, \"Error in input generator (warmup)\"\n",
+    "                assert execution.text, \"Empty input generator output (warmup)\"\n",
+    "                inputs, outputs = eval(execution.text)\n",
+    "            except Exception as e:\n",
+    "                errors[\"warmup_fails\"] += 1\n",
+    "                continue\n",
+    "\n",
+    "            # 3. Run the full sampling\n",
+    "            try:\n",
+    "                execution = sandbox.run_code(\n",
+    "                    code=SAMPLING_TEMPLATE.format(num_generations=TOTAL_GENERATIONS),\n",
+    "                    timeout=TIMEOUT_PER_SAMPLE * TOTAL_GENERATIONS\n",
+    "                )\n",
+    "                assert not execution.error, \"Error in input generator (full)\"\n",
+    "                assert execution.text, \"Empty input generator output (full)\"\n",
+    "                inputs, outputs = eval(execution.text)\n",
+    "                assert len(inputs) == TOTAL_GENERATIONS, \"Mismatch in input generations\"\n",
+    "                assert len(outputs) == TOTAL_GENERATIONS, \"Mismatch in output generations\"\n",
+    "                unique_inputs = len(set(hash(json.dumps(inp, sort_keys=True)) for inp in inputs))\n",
+    "                unique_outputs = len(set(hash(json.dumps(out, sort_keys=True)) for out in outputs))\n",
+    "            except:\n",
+    "                errors[\"full_sampling_fails\"] += 1\n",
+    "                continue\n",
+    "                \n",
+    "            # 4. Save the entry\n",
+    "            entry = entry | {\n",
+    "                \"unique_inputs\": unique_inputs,\n",
+    "                \"unique_outputs\": unique_outputs,\n",
+    "                \"total_generations\": TOTAL_GENERATIONS,\n",
+    "            }\n",
+    "            f_out.write(json.dumps(entry))\n",
+    "            f_out.write(\"\\n\")\n",
+    "\n",
+    "for k, v in errors.items():\n",
+    "    print(f\"{k}: {v}\")\n",
+    "print(f\"Total errors: {sum(errors.values())}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "errors = defaultdict(int)\n",
+    "total_entries = sum(1 for _ in open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\"))\n",
    "\n",
    "# with open(\"data/codeio-pyedu-with-input-generator.jsonl\", \"r\") as f_in, \\\n",
    "#     open(\"data/codeio-pyedu-with-input-generator-filtered.jsonl\", \"w+\") as f_out:\n",