diff --git a/notebooks/codeio/.gitignore b/notebooks/codeio/.gitignore
index 92ac9e52..6570bb8f 100644
--- a/notebooks/codeio/.gitignore
+++ b/notebooks/codeio/.gitignore
@@ -1 +1,2 @@
 raw_files/
+output/
diff --git a/notebooks/codeio/PreprocessCode.ipynb b/notebooks/codeio/PreprocessCode.ipynb
index 921fc201..428bf15f 100644
--- a/notebooks/codeio/PreprocessCode.ipynb
+++ b/notebooks/codeio/PreprocessCode.ipynb
@@ -24,6 +24,54 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cloning into 'Python'...\n",
+      "remote: Enumerating objects: 20925, done.\u001b[K\n",
+      "remote: Counting objects: 100% (13/13), done.\u001b[K\n",
+      "remote: Compressing objects: 100% (11/11), done.\u001b[K\n",
+      "remote: Total 20925 (delta 6), reused 2 (delta 2), pack-reused 20912 (from 3)\u001b[K\n",
+      "Receiving objects: 100% (20925/20925), 14.86 MiB | 17.27 MiB/s, done.\n",
+      "Resolving deltas: 100% (13469/13469), done.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!git clone https://github.com/TheAlgorithms/Python.git\n",
+    "\n",
+    "import shutil\n",
+    "from pathlib import Path\n",
+    "\n",
+    "repo_dir = Path(\"Python\")\n",
+    "raw_code_dir = Path(\"raw_files\")\n",
+    "raw_code_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "def process_dir(directory: Path):\n",
+    "    # Move all the Python code files to the raw code file directory\n",
+    "    # Handles subdirectories recursively\n",
+    "    dirname = directory.name\n",
+    "    for file in directory.iterdir():\n",
+    "        if file.is_dir():\n",
+    "            process_dir(file)\n",
+    "        elif file.name.endswith(\".py\") and file.name != \"__init__.py\":\n",
+    "            file.rename(raw_code_dir / f\"{dirname}_{file.name}\")\n",
+    "\n",
+    "for repo_child in repo_dir.iterdir():\n",
+    "    # For this repo, algorithms are divided into categories by subdirectories\n",
+    "    if not repo_child.is_dir() or repo_child.name.startswith(\".\"):\n",
+    "        continue\n",
+    "    process_dir(repo_child)\n",
+    "\n",
+    "shutil.rmtree(repo_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import random\n",
@@ -42,7 +90,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -87,12 +135,12 @@
     "\n",
     "4. Python 3.11 code for an input generator, which randomly generates valid sets of inputs for the functions.\n",
     "\n",
-    "The input generator should return a dict mapping parameter names to values. The values should be randomly generated, but should be valid inputs for the function.\n",
+    "The input generator should return a dict mapping parameter names to values. The values should be randomly generated, but should be valid inputs for the function. You have access to `random` in the input generator. Do not import any other modules.\n",
     "\n",
     "Example input generator:\n",
     "\n",
     "def input_generator():\n",
-    "    weights = [np.random.uniform(0, 100) for _ in range(40)]\n",
+    "    weights = [random.randint(100) for _ in range(40)]\n",
     "    days = list(range(40))\n",
     "    return {{\"weights_kg\": weights, \"days\": days}}\n",
     "\n",
@@ -104,85 +152,152 @@
     "\"\"\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Edit the below cell or appropriate env variables to utilise different API providers, etc"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import asyncio\n",
     "import os\n",
-    "import time\n",
-    "from openai import OpenAI\n",
+    "from openai import AsyncOpenAI\n",
     "from openai.types.chat import ChatCompletion, ChatCompletionMessageParam\n",
     "from typing import Any, Iterable\n",
     "\n",
-    "def llm_generate(\n",
-    "    client: OpenAI,\n",
+    "# Cap concurrent requests. I had to set this to 1 for the DeepSeek API to work, YMMV\n",
+    "semaphore = asyncio.Semaphore(1)\n",
+    "\n",
+    "async def llm_generate(\n",
+    "    client: AsyncOpenAI,\n",
     "    messages: Iterable[ChatCompletionMessageParam],\n",
     "    sampling_params: dict[str, Any],\n",
+    "    retry_empty_response: bool = True,\n",
+    "    max_retries: int = 3,\n",
     ") -> ChatCompletion:\n",
-    "    max_retry = 3\n",
-    "    for trial in range(max_retry):\n",
-    "        try:\n",
-    "            return client.chat.completions.create(\n",
-    "                messages=messages,\n",
-    "                **sampling_params,\n",
-    "            )\n",
-    "        except Exception as e:\n",
-    "            print(\"failure response:\", e)\n",
-    "            time.sleep(trial * trial)  # quadratic backoff\n",
-    "            if trial == max_retry - 1:\n",
-    "                raise\n",
+    "    for trial in range(max_retries):\n",
+    "        async with semaphore:\n",
+    "            try:\n",
+    "                completion = await client.chat.completions.create(\n",
+    "                    messages=messages, **sampling_params\n",
+    "                )\n",
+    "                if completion.choices[0].message.content or not retry_empty_response:\n",
+    "                    return completion\n",
+    "                await asyncio.sleep(5)\n",
+    "            except Exception as e:\n",
+    "                print(f\"Failure response (trial {trial}):\", e)\n",
+    "                await asyncio.sleep(3 * (trial + 1))\n",
+    "                if trial == max_retries - 1:\n",
+    "                    raise\n",
     "\n",
-    "open_router_client = OpenAI(\n",
-    "    base_url=\"https://openrouter.ai/api/v1\",\n",
-    "    api_key=os.getenv(\"OPENROUTER_API_KEY\"),\n",
-    "    timeout=90.0,\n",
+    "client = AsyncOpenAI(\n",
+    "    base_url=os.getenv(\"API_BASE_URL\"),\n",
+    "    api_key=os.getenv(\"API_KEY\"),\n",
+    "    timeout=120.0,\n",
     ")\n",
     "\n",
     "sampling_params = {\n",
-    "    \"model\": \"deepseek/deepseek-chat:free\",\n",
+    "    \"model\": \"deepseek-chat\",  # For DeepSeek API\n",
+    "    #\"model\": \"deepseek/deepseek-chat:free\",  # For OpenRouter\n",
     "    \"max_tokens\": 8192,\n",
     "}"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Demo cell to illustrate the LLM preprocessing:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "raw_files/climbing_stairs.py\n",
-      "def main(number_of_steps):\n",
-      "    assert isinstance(number_of_steps, int) and number_of_steps > 0, (\n",
-      "        f\"number_of_steps needs to be positive integer, your input {number_of_steps}\"\n",
-      "    )\n",
-      "    if number_of_steps == 1:\n",
-      "        return {\"distinct_ways\": 1}\n",
-      "    previous, current = 1, 1\n",
-      "    for _ in range(number_of_steps - 1):\n",
-      "        current, previous = current + previous, current\n",
-      "    return {\"distinct_ways\": current}\n",
+      "raw_files/genetic_algorithm_basic_string.py\n",
+      "def main(target: str, genes: list[str], debug: bool = True) -> dict:\n",
+      "    if N_POPULATION < N_SELECTED:\n",
+      "        raise ValueError(f\"{N_POPULATION} must be bigger than {N_SELECTED}\")\n",
+      "    \n",
+      "    not_in_genes_list = sorted({c for c in target if c not in genes})\n",
+      "    if not_in_genes_list:\n",
+      "        raise ValueError(f\"{not_in_genes_list} is not in genes list, evolution cannot converge\")\n",
+      "    \n",
+      "    population = []\n",
+      "    for _ in range(N_POPULATION):\n",
+      "        population.append(\"\".join([random.choice(genes) for _ in range(len(target))]))\n",
+      "    \n",
+      "    generation, total_population = 0, 0\n",
+      "    \n",
+      "    while True:\n",
+      "        generation += 1\n",
+      "        total_population += len(population)\n",
+      "        \n",
+      "        population_score = [evaluate(item, target) for item in population]\n",
+      "        population_score = sorted(population_score, key=lambda x: x[1], reverse=True)\n",
+      "        \n",
+      "        if population_score[0][0] == target:\n",
+      "            return {\n",
+      "                \"generation\": generation,\n",
+      "                \"total_population\": total_population,\n",
+      "                \"best_match\": population_score[0][0]\n",
+      "            }\n",
+      "        \n",
+      "        if debug and generation % 10 == 0:\n",
+      "            print(\n",
+      "                f\"\\nGeneration: {generation}\"\n",
+      "                f\"\\nTotal Population:{total_population}\"\n",
+      "                f\"\\nBest score: {population_score[0][1]}\"\n",
+      "                f\"\\nBest string: {population_score[0][0]}\"\n",
+      "            )\n",
+      "        \n",
+      "        population_best = population[: int(N_POPULATION / 3)]\n",
+      "        population.clear()\n",
+      "        population.extend(population_best)\n",
+      "        population_score = [\n",
+      "            (item, score / len(target)) for item, score in population_score\n",
+      "        ]\n",
+      "        \n",
+      "        for i in range(N_SELECTED):\n",
+      "            population.extend(select(population_score[int(i)], population_score, genes))\n",
+      "            if len(population) > N_POPULATION:\n",
+      "                break\n",
       "\n",
       "---\n",
-      "You are given an integer `number_of_steps` representing the number of steps on a staircase. Your task is to calculate the number of distinct ways to climb the staircase, where each time you can either climb 1 or 2 steps. Return the number of distinct ways as an integer.\n",
+      "\n",
+      "You are given a target string and a list of genes. The target string represents the desired output of a genetic algorithm, and the genes list contains the possible characters that can be used to build the target string. The genetic algorithm works in phases: evaluation, selection, crossover, and mutation. The algorithm starts with a random population of strings and evolves them over generations to converge towards the target string. The function returns the number of generations it took to find a perfect match, the total population size processed, and the best matching string found.\n",
       "\n",
       "---\n",
+      "\n",
       "Input:\n",
-      "    number_of_steps (int): The number of steps on the staircase. Must be a positive integer.\n",
+      "    target (str): The target string that the genetic algorithm aims to converge to.\n",
+      "    genes (list of str): A list of characters that can be used to build the target string.\n",
+      "    debug (bool, optional): If True, prints progress every 10 generations. Defaults to True.\n",
       "\n",
       "Output:\n",
-      "    return (dict): A dictionary with one key:\n",
-      "    - distinct_ways (int): The number of distinct ways to climb the staircase.\n",
+      "    return (dict): A dictionary with three keys:\n",
+      "    - generation (int): The number of generations it took to find a perfect match.\n",
+      "    - total_population (int): The total population size processed during the evolution.\n",
+      "    - best_match (str): The best matching string found.\n",
       "\n",
       "---\n",
+      "\n",
       "def input_generator():\n",
-      "    import random\n",
-      "    number_of_steps = random.randint(1, 100)\n",
-      "    return {\"number_of_steps\": number_of_steps}\n"
+      "    genes = list(\" ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,;!?+-*#@^'èéòà€ù=)(&%$£/\\\\\")\n",
+      "    target_length = random.randint(10, 50)\n",
+      "    target = \"\".join(random.choices(genes, k=target_length))\n",
+      "    return {\"target\": target, \"genes\": genes, \"debug\": random.choice([True, False])}\n"
      ]
     }
    ],
@@ -199,36 +314,148 @@
     "    {\"role\": \"user\", \"content\": prompt},\n",
     "]\n",
     "\n",
-    "response = llm_generate(open_router_client, messages, sampling_params)\n",
+    "response = await llm_generate(client, messages, sampling_params)\n",
     "print(response.choices[0].message.content)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "code, query, parameters, generator = response.choices[0].message.content.split(\"\\n---\\n\")"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The below cell executes arbitrary code, so be careful with what you run."
+    "Run the below cell to preprocess all the raw code files for real. This will send quite a lot of requests to OpenRouter."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failure response (trial 1): Expecting value: line 1 column 1 (char 0)\n",
+      "Error processing file raw_files/graphs_page_rank.py Expecting value: line 1 column 1 (char 0)\n",
+      "Failure response (trial 1): Expecting value: line 1 column 1 (char 0)\n",
+      "Error processing file raw_files/problem_002_sol2.py Expecting value: line 1 column 1 (char 0)\n"
+     ]
+    }
+   ],
    "source": [
+    "import json\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "async def process_file(raw_file):\n",
+    "    raw_code = raw_file.read_text()\n",
+    "    prompt = format_prompt_template.format(raw_code)\n",
+    "    messages = [{\"role\": \"user\", \"content\": prompt}]\n",
+    "\n",
+    "    try:\n",
+    "        response = await llm_generate(client, messages, sampling_params)\n",
+    "        content = response.choices[0].message.content\n",
+    "        code, query, parameters, generator = [el.strip() for el in content.split(\"\\n---\\n\")]\n",
+    "        return code, query, parameters, generator\n",
+    "    except Exception as e:\n",
+    "        print(\"Error processing file\", raw_file, e)\n",
+    "\n",
+    "async def process_all_files(raw_code_files: list[Path], out_file: Path):\n",
+    "    process_tasks = []\n",
+    "    for raw_file in raw_code_files:\n",
+    "        process_tasks.append(asyncio.create_task(process_file(raw_file)))\n",
+    "    for future in tqdm(asyncio.as_completed(process_tasks), total=len(process_tasks)):\n",
+    "        code, query, parameters, generator = await future\n",
+    "        out_object = {\"query\": query, \"reference_code\": code, \"parameters\": parameters, \"input_generator\": generator}\n",
+    "        out_json = json.dumps(out_object)\n",
+    "        with out_file.open(\"a\") as f:\n",
+    "            f.write(out_json + \"\\n\")\n",
+    "\n",
+    "out_file = Path(\"processed_code.jsonl\")\n",
+    "await process_all_files(raw_files, out_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load one of the processed outputs to test the reference code and input generator.\n",
+    "\n",
+    "The below cell executes the loaded LLM-generated code, so exercise caution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'particles': [{'x': 46.08733176390575, 'y': -79.53711508439847, 'z': 45.779499438274655, 'mass': 9.121897656796}, {'x': -37.62801734935914, 'y': 94.62608762267024, 'z': -88.900444530177, 'mass': 13.267310061939007}, {'x': 57.04088821817467, 'y': 42.54071907694012, 'z': -73.71739928081027, 'mass': 33.13376982254907}, {'x': -25.913090702690695, 'y': 97.27894813174453, 'z': -68.24577317209872, 'mass': 20.409856607552626}, {'x': -7.993371736001535, 'y': 5.784333365689022, 'z': 82.05216927454009, 'mass': 97.18903185914192}, {'x': 8.028265944329263, 'y': -16.980411042271342, 'z': -38.28350230155666, 'mass': 68.56437969046345}, {'x': 72.19027810108415, 'y': 40.80441736137902, 'z': -27.381163108822662, 'mass': 31.705269244558238}]}\n",
+      "{'particles': [{'x': -82.51989169298639, 'y': 79.31892816610184, 'z': 74.79703074246333, 'mass': 8.173913842116992}, {'x': 40.50078366091543, 'y': -81.62144939582438, 'z': -90.67215023121767, 'mass': 69.66013035036612}, {'x': 23.07410631316951, 'y': 52.57873390089097, 'z': -77.63883105258888, 'mass': 63.20676872636796}]}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[({'particles': [{'x': 46.08733176390575,\n",
+       "     'y': -79.53711508439847,\n",
+       "     'z': 45.779499438274655,\n",
+       "     'mass': 9.121897656796},\n",
+       "    {'x': -37.62801734935914,\n",
+       "     'y': 94.62608762267024,\n",
+       "     'z': -88.900444530177,\n",
+       "     'mass': 13.267310061939007},\n",
+       "    {'x': 57.04088821817467,\n",
+       "     'y': 42.54071907694012,\n",
+       "     'z': -73.71739928081027,\n",
+       "     'mass': 33.13376982254907},\n",
+       "    {'x': -25.913090702690695,\n",
+       "     'y': 97.27894813174453,\n",
+       "     'z': -68.24577317209872,\n",
+       "     'mass': 20.409856607552626},\n",
+       "    {'x': -7.993371736001535,\n",
+       "     'y': 5.784333365689022,\n",
+       "     'z': 82.05216927454009,\n",
+       "     'mass': 97.18903185914192},\n",
+       "    {'x': 8.028265944329263,\n",
+       "     'y': -16.980411042271342,\n",
+       "     'z': -38.28350230155666,\n",
+       "     'mass': 68.56437969046345},\n",
+       "    {'x': 72.19027810108415,\n",
+       "     'y': 40.80441736137902,\n",
+       "     'z': -27.381163108822662,\n",
+       "     'mass': 31.705269244558238}]},\n",
+       "  {'center_of_mass': {'x': 12.23, 'y': 16.89, 'z': -0.42}}),\n",
+       " ({'particles': [{'x': -82.51989169298639,\n",
+       "     'y': 79.31892816610184,\n",
+       "     'z': 74.79703074246333,\n",
+       "     'mass': 8.173913842116992},\n",
+       "    {'x': 40.50078366091543,\n",
+       "     'y': -81.62144939582438,\n",
+       "     'z': -90.67215023121767,\n",
+       "     'mass': 69.66013035036612},\n",
+       "    {'x': 23.07410631316951,\n",
+       "     'y': 52.57873390089097,\n",
+       "     'z': -77.63883105258888,\n",
+       "     'mass': 63.20676872636796}]},\n",
+       "  {'center_of_mass': {'x': 25.56, 'y': -12.15, 'z': -75.24}})]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rng = random.Random()\n",
+    "\n",
+    "sample_object = json.loads(out_file.read_text().splitlines()[0])\n",
+    "\n",
     "def generate_io_pairs(main_code: str, input_generator_code: str, num_pairs: int = 100):\n",
-    "    local_vars = {}\n",
-    "    exec(main_code, {}, local_vars)\n",
-    "    exec(input_generator_code, {}, local_vars)\n",
+    "    local_vars = {\"random\": rng}\n",
+    "    exec(main_code, {\"random\": rng}, local_vars)\n",
+    "    exec(input_generator_code, {\"random\": rng}, local_vars)\n",
     "    io_pairs = []\n",
     "    for _ in range(num_pairs):\n",
     "        inputs = local_vars[\"input_generator\"]()\n",
@@ -236,27 +463,7 @@
     "        io_pairs.append((inputs, outputs))\n",
     "    return io_pairs\n",
     "\n",
-    "io_pairs = generate_io_pairs(code, generator, num_pairs=2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[({'number_of_steps': 65}, {'distinct_ways': 27777890035288}),\n",
-       " ({'number_of_steps': 19}, {'distinct_ways': 6765})]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
+    "io_pairs = generate_io_pairs(sample_object[\"reference_code\"], sample_object[\"input_generator\"], num_pairs=2)\n",
     "io_pairs"
    ]
   },
@@ -264,16 +471,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next we need to synthesize chains of thought from the LLM for use in building a supervised finetuning dataset. From the paper:\n",
+    "Next in the paper they synthesized chains of thought from the LLM for use in building a supervised finetuning dataset. Excerpt:\n",
     "\n",
     "> Since we aim for the input-output prediction tasks, we construct the prompt using a designed template to combine the function, the query, the reference code, and either a specific input or output. The response should ideally be a natural language CoT to reason about how to derive the correct output or a feasible input.\n",
     "\n",
-    "The below prompts are from the paper."
+    "The below prompts are also from the paper. Synthesized chains of thought are not our main goal, but the cells below provide a demo nonetheless."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -312,56 +519,56 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'To determine the input `number_of_steps` that results in the output `{\\'distinct_ways\\': 27777890035288}`, we need to understand that this problem is related to the Fibonacci sequence. Specifically, the number of distinct ways to climb `n` steps, where you can climb either 1 or 2 steps at a time, is equal to the `(n+1)`-th Fibonacci number.\\n\\nGiven the output `27777890035288`, we need to find the integer `n` such that the `(n+1)`-th Fibonacci number is `27777890035288`.\\n\\nThe Fibonacci sequence grows exponentially, and the number `27777890035288` is a very large Fibonacci number. To find the corresponding `n`, we can use the fact that the Fibonacci sequence follows the recurrence relation:\\n\\n\\\\[ F(n) = F(n-1) + F(n-2) \\\\]\\n\\nGiven that `F(73) = 806515533049393` and `F(72) = 498454011879264`, it is clear that `27777890035288` is much smaller than `F(73)`. We need to find the exact `n` such that `F(n+1) = 27777890035288`.\\n\\nHowever, calculating Fibonacci numbers manually for large `n` is impractical. Instead, we can use the fact that `F(75) = 2111485077978050`, which is larger than `27777890035288`. Therefore, the `n` we are looking for must be between 72 and 75.\\n\\nBy checking Fibonacci numbers closer to `27777890035288`, we find that:\\n\\n\\\\[ F(74) = 1304969544928657 \\\\]\\n\\\\[ F(75) = 2111485077978050 \\\\]\\n\\nSince `27777890035288` is significantly larger than `F(74)` but smaller than `F(75)`, it is clear that `n` is 74.\\n\\nThus, the input `number_of_steps` should be 74, which corresponds to `F(75) = 27777890035288`.\\n\\nTherefore, the feasible input is:\\n\\n```json\\n{\"number_of_steps\": 74}\\n```'"
+       "\"To predict a feasible input that would result in the given output `{'center_of_mass': {'x': 12.23, 'y': 16.89, 'z': -0.42}}`, we need to consider the formula for calculating the center of mass in 3D space. The center of mass is calculated as the weighted average of the positions of the particles, where the weights are the masses of the particles.\\n\\nThe formula for the center of mass is:\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{\\\\sum (x_i \\\\cdot m_i)}{\\\\sum m_i}\\n\\\\]\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{\\\\sum (y_i \\\\cdot m_i)}{\\\\sum m_i}\\n\\\\]\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{\\\\sum (z_i \\\\cdot m_i)}{\\\\sum m_i}\\n\\\\]\\n\\nGiven the output, we can work backward to estimate the input. Let's assume we have two particles for simplicity:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.0\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's calculate the center of mass using these values:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.0 + 3.0 = 5.0\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.0) + (14.0 \\\\cdot 3.0)}{5.0} = \\\\frac{20.0 + 42.0}{5.0} = \\\\frac{62.0}{5.0} = 12.4\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.0) + (18.0 \\\\cdot 3.0)}{5.0} = \\\\frac{30.0 + 54.0}{5.0} = \\\\frac{84.0}{5.0} = 16.8\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.0) + (-1.0 \\\\cdot 3.0)}{5.0} = \\\\frac{0.0 - 3.0}{5.0} = \\\\frac{-3.0}{5.0} = -0.6\\n\\\\]\\n\\nThese values are close to the given output, but not exact. To get closer to the exact output, we can adjust the masses slightly:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.1\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.1 + 3.0 = 5.1\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.1) + (14.0 \\\\cdot 3.0)}{5.1} = \\\\frac{21.0 + 42.0}{5.1} = \\\\frac{63.0}{5.1} \\\\approx 12.35\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.1) + (18.0 \\\\cdot 3.0)}{5.1} = \\\\frac{31.5 + 54.0}{5.1} = \\\\frac{85.5}{5.1} \\\\approx 16.76\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.1) + (-1.0 \\\\cdot 3.0)}{5.1} = \\\\frac{0.0 - 3.0}{5.1} = \\\\frac{-3.0}{5.1} \\\\approx -0.59\\n\\\\]\\n\\nThese values are closer to the given output. To match the exact output, we can further adjust the masses:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.2\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.2 + 3.0 = 5.2\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.2) + (14.0 \\\\cdot 3.0)}{5.2} = \\\\frac{22.0 + 42.0}{5.2} = \\\\frac{64.0}{5.2} \\\\approx 12.31\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.2) + (18.0 \\\\cdot 3.0)}{5.2} = \\\\frac{33.0 + 54.0}{5.2} = \\\\frac{87.0}{5.2} \\\\approx 16.73\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.2) + (-1.0 \\\\cdot 3.0)}{5.2} = \\\\frac{0.0 - 3.0}{5.2} = \\\\frac{-3.0}{5.2} \\\\approx -0.58\\n\\\\]\\n\\nThese values are very close to the given output. To match the exact output, we can adjust the masses slightly more:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.25\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.25 + 3.0 = 5.25\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.25) + (14.0 \\\\cdot 3.0)}{5.25} = \\\\frac{22.5 + 42.0}{5.25} = \\\\frac{64.5}{5.25} \\\\approx 12.29\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.25) + (18.0 \\\\cdot 3.0)}{5.25} = \\\\frac{33.75 + 54.0}{5.25} = \\\\frac{87.75}{5.25} \\\\approx 16.71\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.25) + (-1.0 \\\\cdot 3.0)}{5.25} = \\\\frac{0.0 - 3.0}{5.25} = \\\\frac{-3.0}{5.25} \\\\approx -0.57\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.3\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.3 + 3.0 = 5.3\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.3) + (14.0 \\\\cdot 3.0)}{5.3} = \\\\frac{23.0 + 42.0}{5.3} = \\\\frac{65.0}{5.3} \\\\approx 12.26\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.3) + (18.0 \\\\cdot 3.0)}{5.3} = \\\\frac{34.5 + 54.0}{5.3} = \\\\frac{88.5}{5.3} \\\\approx 16.70\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.3) + (-1.0 \\\\cdot 3.0)}{5.3} = \\\\frac{0.0 - 3.0}{5.3} = \\\\frac{-3.0}{5.3} \\\\approx -0.57\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.35\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.35 + 3.0 = 5.35\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.35) + (14.0 \\\\cdot 3.0)}{5.35} = \\\\frac{23.5 + 42.0}{5.35} = \\\\frac{65.5}{5.35} \\\\approx 12.24\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.35) + (18.0 \\\\cdot 3.0)}{5.35} = \\\\frac{35.25 + 54.0}{5.35} = \\\\frac{89.25}{5.35} \\\\approx 16.68\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.35) + (-1.0 \\\\cdot 3.0)}{5.35} = \\\\frac{0.0 - 3.0}{5.35} = \\\\frac{-3.0}{5.35} \\\\approx -0.56\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.4\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.4 + 3.0 = 5.4\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.4) + (14.0 \\\\cdot 3.0)}{5.4} = \\\\frac{24.0 + 42.0}{5.4} = \\\\frac{66.0}{5.4} \\\\approx 12.22\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.4) + (18.0 \\\\cdot 3.0)}{5.4} = \\\\frac{36.0 + 54.0}{5.4} = \\\\frac{90.0}{5.4} \\\\approx 16.67\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.4) + (-1.0 \\\\cdot 3.0)}{5.4} = \\\\frac{0.0 - 3.0}{5.4} = \\\\frac{-3.0}{5.4} \\\\approx -0.56\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.45\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.45 + 3.0 = 5.45\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.45) + (14.0 \\\\cdot 3.0)}{5.45} = \\\\frac{24.5 + 42.0}{5.45} = \\\\frac{66.5}{5.45} \\\\approx 12.20\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.45) + (18.0 \\\\cdot 3.0)}{5.45} = \\\\frac{36.75 + 54.0}{5.45} = \\\\frac{90.75}{5.45} \\\\approx 16.65\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.45) + (-1.0 \\\\cdot 3.0)}{5.45} = \\\\frac{0.0 - 3.0}{5.45} = \\\\frac{-3.0}{5.45} \\\\approx -0.55\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.5\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.5 + 3.0 = 5.5\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.5) + (14.0 \\\\cdot 3.0)}{5.5} = \\\\frac{25.0 + 42.0}{5.5} = \\\\frac{67.0}{5.5} \\\\approx 12.18\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.5) + (18.0 \\\\cdot 3.0)}{5.5} = \\\\frac{37.5 + 54.0}{5.5} = \\\\frac{91.5}{5.5} \\\\approx 16.64\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.5) + (-1.0 \\\\cdot 3.0)}{5.5} = \\\\frac{0.0 - 3.0}{5.5} = \\\\frac{-3.0}{5.5} \\\\approx -0.55\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.55\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.55 + 3.0 = 5.55\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.55) + (14.0 \\\\cdot 3.0)}{5.55} = \\\\frac{25.5 + 42.0}{5.55} = \\\\frac{67.5}{5.55} \\\\approx 12.16\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.55) + (18.0 \\\\cdot 3.0)}{5.55} = \\\\frac{38.25 + 54.0}{5.55} = \\\\frac{92.25}{5.55} \\\\approx 16.62\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.55) + (-1.0 \\\\cdot 3.0)}{5.55} = \\\\frac{0.0 - 3.0}{5.55} = \\\\frac{-3.0}{5.55} \\\\approx -0.54\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.6\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.6 + 3.0 = 5.6\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.6) + (14.0 \\\\cdot 3.0)}{5.6} = \\\\frac{26.0 + 42.0}{5.6} = \\\\frac{68.0}{5.6} \\\\approx 12.14\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.6) + (18.0 \\\\cdot 3.0)}{5.6} = \\\\frac{39.0 + 54.0}{5.6} = \\\\frac{93.0}{5.6} \\\\approx 16.61\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.6) + (-1.0 \\\\cdot 3.0)}{5.6} = \\\\frac{0.0 - 3.0}{5.6} = \\\\frac{-3.0}{5.6} \\\\approx -0.54\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.65\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.65 + 3.0 = 5.65\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.65) + (14.0 \\\\cdot 3.0)}{5.65} = \\\\frac{26.5 + 42.0}{5.65} = \\\\frac{68.5}{5.65} \\\\approx 12.12\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.65) + (18.0 \\\\cdot 3.0)}{5.65} = \\\\frac{39.75 + 54.0}{5.65} = \\\\frac{93.75}{5.65} \\\\approx 16.59\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.65) + (-1.0 \\\\cdot 3.0)}{5.65} = \\\\frac{0.0 - 3.0}{5.65} = \\\\frac{-3.0}{5.65} \\\\approx -0.53\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.7\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.7 + 3.0 = 5.7\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.7) + (14.0 \\\\cdot 3.0)}{5.7} = \\\\frac{27.0 + 42.0}{5.7} = \\\\frac{69.0}{5.7} \\\\approx 12.11\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.7) + (18.0 \\\\cdot 3.0)}{5.7} = \\\\frac{40.5 + 54.0}{5.7} = \\\\frac{94.5}{5.7} \\\\approx 16.58\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.7) + (-1.0 \\\\cdot 3.0)}{5.7} = \\\\frac{0.0 - 3.0}{5.7} = \\\\frac{-3.0}{5.7} \\\\approx -0.53\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.75\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.75 + 3.0 = 5.75\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.75) + (14.0 \\\\cdot 3.0)}{5.75} = \\\\frac{27.5 + 42.0}{5.75} = \\\\frac{69.5}{5.75} \\\\approx 12.09\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.75) + (18.0 \\\\cdot 3.0)}{5.75} = \\\\frac{41.25 + 54.0}{5.75} = \\\\frac{95.25}{5.75} \\\\approx 16.57\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.75) + (-1.0 \\\\cdot 3.0)}{5.75} = \\\\frac{0.0 - 3.0}{5.75} = \\\\frac{-3.0}{5.75} \\\\approx -0.52\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.8\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.8 + 3.0 = 5.8\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.8) + (14.0 \\\\cdot 3.0)}{5.8} = \\\\frac{28.0 + 42.0}{5.8} = \\\\frac{70.0}{5.8} \\\\approx 12.07\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.8) + (18.0 \\\\cdot 3.0)}{5.8} = \\\\frac{42.0 + 54.0}{5.8} = \\\\frac{96.0}{5.8} \\\\approx 16.55\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.8) + (-1.0 \\\\cdot 3.0)}{5.8} = \\\\frac{0.0 - 3.0}{5.8} = \\\\frac{-3.0}{5.8} \\\\approx -0.52\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.85\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.85 + 3.0 = 5.85\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.85) + (14.0 \\\\cdot 3.0)}{5.85} = \\\\frac{28.5 + 42.0}{5.85} = \\\\frac{70.5}{5.85} \\\\approx 12.05\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.85) + (18.0 \\\\cdot 3.0)}{5.85} = \\\\frac{42.75 + 54.0}{5.85} = \\\\frac{96.75}{5.85} \\\\approx 16.54\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.85) + (-1.0 \\\\cdot 3.0)}{5.85} = \\\\frac{0.0 - 3.0}{5.85} = \\\\frac{-3.0}{5.85} \\\\approx -0.51\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.9\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.9 + 3.0 = 5.9\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.9) + (14.0 \\\\cdot 3.0)}{5.9} = \\\\frac{29.0 + 42.0}{5.9} = \\\\frac{71.0}{5.9} \\\\approx 12.03\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.9) + (18.0 \\\\cdot 3.0)}{5.9} = \\\\frac{43.5 + 54.0}{5.9} = \\\\frac{97.5}{5.9} \\\\approx 16.53\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.9) + (-1.0 \\\\cdot 3.0)}{5.9} = \\\\frac{0.0 - 3.0}{5.9} = \\\\frac{-3.0}{5.9} \\\\approx -0.51\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 2.95\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 2.95 + 3.0 = 5.95\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 2.95) + (14.0 \\\\cdot 3.0)}{5.95} = \\\\frac{29.5 + 42.0}{5.95} = \\\\frac{71.5}{5.95} \\\\approx 12.02\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 2.95) + (18.0 \\\\cdot 3.0)}{5.95} = \\\\frac{44.25 + 54.0}{5.95} = \\\\frac{98.25}{5.95} \\\\approx 16.51\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 2.95) + (-1.0 \\\\cdot 3.0)}{5.95} = \\\\frac{0.0 - 3.0}{5.95} = \\\\frac{-3.0}{5.95} \\\\approx -0.50\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 3.0\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 = 18.0\\\\), \\\\(z_2 = -1.0\\\\)\\n   - Mass: \\\\(m_2 = 3.0\\\\)\\n\\nNow, let's recalculate:\\n\\n\\\\[\\n\\\\text{total\\\\_mass} = m_1 + m_2 = 3.0 + 3.0 = 6.0\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{(10.0 \\\\cdot 3.0) + (14.0 \\\\cdot 3.0)}{6.0} = \\\\frac{30.0 + 42.0}{6.0} = \\\\frac{72.0}{6.0} = 12.0\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{(15.0 \\\\cdot 3.0) + (18.0 \\\\cdot 3.0)}{6.0} = \\\\frac{45.0 + 54.0}{6.0} = \\\\frac{99.0}{6.0} = 16.5\\n\\\\]\\n\\n\\\\[\\n\\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{(0.0 \\\\cdot 3.0) + (-1.0 \\\\cdot 3.0)}{6.0} = \\\\frac{0.0 - 3.0}{6.0} = \\\\frac{-3.0}{6.0} = -0.5\\n\\\\]\\n\\nThese values are still close but not exact. To match the exact output, we can adjust the masses further:\\n\\n1. **Particle 1**:\\n   - Position: \\\\(x_1 = 10.0\\\\), \\\\(y_1 = 15.0\\\\), \\\\(z_1 = 0.0\\\\)\\n   - Mass: \\\\(m_1 = 3.05\\\\)\\n\\n2. **Particle 2**:\\n   - Position: \\\\(x_2 = 14.0\\\\), \\\\(y_2 =\""
       ]
      },
-     "execution_count": 17,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def predict_input(query, parameters, output, reference_code):\n",
+    "async def predict_input(query, parameters, output, reference_code):\n",
     "    messages = [\n",
     "        {\"role\": \"user\", \"content\": synthetic_cot_prompt_input_prediction.format(query, parameters, output, reference_code)},\n",
     "    ]\n",
-    "    response = llm_generate(open_router_client, messages, sampling_params)\n",
+    "    response = await llm_generate(client, messages, sampling_params)\n",
     "    return response.choices[0].message.content\n",
     "\n",
-    "predict_input(query, parameters, io_pairs[0][1], code)"
+    "await predict_input(sample_object[\"query\"], sample_object[\"parameters\"], io_pairs[0][1], sample_object[\"reference_code\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'To solve this problem, we need to calculate the number of distinct ways to climb a staircase with `number_of_steps` steps, where you can either take 1 or 2 steps at a time. This problem is a classic example of a dynamic programming problem and is very similar to the Fibonacci sequence.\\n\\n### Reasoning:\\n- The number of distinct ways to climb `n` steps is equal to the sum of the number of distinct ways to climb `n-1` steps and the number of distinct ways to climb `n-2` steps. This is because from the `n-1`th step, you can take a single step to reach the `n`th step, and from the `n-2`th step, you can take two steps to reach the `n`th step.\\n- The base cases are:\\n  - For `n = 1`, there is only 1 way to climb the staircase (taking a single step).\\n  - For `n = 2`, there are 2 ways to climb the staircase (taking two single steps or one double step).\\n\\nThe number of distinct ways to climb `n` steps follows the Fibonacci sequence. The Fibonacci sequence is defined as follows:\\n- F(0) = 0\\n- F(1) = 1\\n- F(n) = F(n-1) + F(n-2) for n ≥ 2\\n\\nHowever, in our problem, the number of ways to climb `n` steps corresponds to F(n+1) in the Fibonacci sequence. For example:\\n- For `n = 1` (F(2)), there is 1 way.\\n- For `n = 2` (F(3)), there are 2 ways.\\n- For `n = 3` (F(4)), there are 3 ways.\\n- For `n = 4` (F(5)), there are 5 ways.\\n\\nGiven `number_of_steps = 19`, we need to calculate F(20).\\n\\nThe Fibonacci sequence up to F(20) is as follows:\\n- F(0) = 0\\n- F(1) = 1\\n- F(2) = 1\\n- F(3) = 2\\n- F(4) = 3\\n- F(5) = 5\\n- F(6) = 8\\n- F(7) = 13\\n- F(8) = 21\\n- F(9) = 34\\n- F(10) = 55\\n- F(11) = 89\\n- F(12) = 144\\n- F(13) = 233\\n- F(14) = 377\\n- F(15) = 610\\n- F(16) = 987\\n- F(17) = 1597\\n- F(18) = 2584\\n- F(19) = 4181\\n- F(20) = 6765\\n\\nTherefore, the number of distinct ways to climb a staircase with 19 steps is 6765.\\n\\n### Final Answer:\\n```json\\n{\"output\": {\"distinct_ways\": 6765}}\\n```'"
+       "'To calculate the center of mass for the given list of particles, we need to follow these steps:\\n\\n1. **Check for Errors**: \\n   - Ensure that the list of particles is not empty.\\n   - Ensure that all particles have a mass greater than zero.\\n\\n2. **Calculate Total Mass**: \\n   - Sum the masses of all particles.\\n\\n3. **Calculate Weighted Positions**: \\n   - For each coordinate (x, y, z), calculate the sum of the product of each particle\\'s position and its mass.\\n\\n4. **Compute Center of Mass**: \\n   - Divide the weighted sums by the total mass to get the center of mass coordinates.\\n   - Round the results to two decimal places.\\n\\nLet\\'s apply these steps to the given input:\\n\\n### Input:\\n```json\\n{\\n  \"particles\": [\\n    {\"x\": -82.51989169298639, \"y\": 79.31892816610184, \"z\": 74.79703074246333, \"mass\": 8.173913842116992},\\n    {\"x\": 40.50078366091543, \"y\": -81.62144939582438, \"z\": -90.67215023121767, \"mass\": 69.66013035036612},\\n    {\"x\": 23.07410631316951, \"y\": 52.57873390089097, \"z\": -77.63883105258888, \"mass\": 63.20676872636796}\\n  ]\\n}\\n```\\n\\n### Step-by-Step Calculation:\\n\\n1. **Total Mass**:\\n   \\\\[\\n   \\\\text{total\\\\_mass} = 8.173913842116992 + 69.66013035036612 + 63.20676872636796 = 141.04081291885107\\n   \\\\]\\n\\n2. **Weighted Sum for x**:\\n   \\\\[\\n   \\\\text{weighted\\\\_x} = (-82.51989169298639 \\\\times 8.173913842116992) + (40.50078366091543 \\\\times 69.66013035036612) + (23.07410631316951 \\\\times 63.20676872636796)\\n   \\\\]\\n   \\\\[\\n   \\\\text{weighted\\\\_x} = -674.38 + 2820.00 + 1458.00 = 3603.62\\n   \\\\]\\n\\n3. **Weighted Sum for y**:\\n   \\\\[\\n   \\\\text{weighted\\\\_y} = (79.31892816610184 \\\\times 8.173913842116992) + (-81.62144939582438 \\\\times 69.66013035036612) + (52.57873390089097 \\\\times 63.20676872636796)\\n   \\\\]\\n   \\\\[\\n   \\\\text{weighted\\\\_y} = 648.00 - 5685.00 + 3325.00 = -1712.00\\n   \\\\]\\n\\n4. **Weighted Sum for z**:\\n   \\\\[\\n   \\\\text{weighted\\\\_z} = (74.79703074246333 \\\\times 8.173913842116992) + (-90.67215023121767 \\\\times 69.66013035036612) + (-77.63883105258888 \\\\times 63.20676872636796)\\n   \\\\]\\n   \\\\[\\n   \\\\text{weighted\\\\_z} = 611.00 - 6315.00 - 4900.00 = -10604.00\\n   \\\\]\\n\\n5. **Center of Mass Coordinates**:\\n   \\\\[\\n   \\\\text{center\\\\_of\\\\_mass\\\\_x} = \\\\frac{3603.62}{141.04081291885107} \\\\approx 25.55\\n   \\\\]\\n   \\\\[\\n   \\\\text{center\\\\_of\\\\_mass\\\\_y} = \\\\frac{-1712.00}{141.04081291885107} \\\\approx -12.14\\n   \\\\]\\n   \\\\[\\n   \\\\text{center\\\\_of\\\\_mass\\\\_z} = \\\\frac{-10604.00}{141.04081291885107} \\\\approx -75.18\\n   \\\\]\\n\\n### Final Output:\\n```json\\n{\\n  \"output\": {\\n    \"center_of_mass\": {\\n      \"x\": 25.55,\\n      \"y\": -12.14,\\n      \"z\": -75.18\\n    }\\n  }\\n}\\n```'"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def predict_output(query, parameters, input, reference_code):\n",
+    "async def predict_output(query, parameters, input, reference_code):\n",
     "    messages = [\n",
     "        {\"role\": \"user\", \"content\": synthetic_cot_prompt_output_prediction.format(query, parameters, input, reference_code)},\n",
     "    ]\n",
-    "    response = llm_generate(open_router_client, messages, sampling_params)\n",
+    "    response = await llm_generate(client, messages, sampling_params)\n",
     "    return response.choices[0].message.content\n",
     "\n",
-    "predict_output(query, parameters, io_pairs[1][0], code)"
+    "await predict_output(sample_object[\"query\"], sample_object[\"parameters\"], io_pairs[1][0], sample_object[\"reference_code\"])"
    ]
   },
   {
diff --git a/notebooks/codeio/ReformatAndFilter.ipynb b/notebooks/codeio/ReformatAndFilter.ipynb
new file mode 100644
index 00000000..587287f8
--- /dev/null
+++ b/notebooks/codeio/ReformatAndFilter.ipynb
@@ -0,0 +1,694 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Reformat the output JSON & code from the preprocessing step in `notebooks/codeio/PreprocessCode.ipynb`.\n",
+    "\n",
+    "The output format will align with the data we extract from existing CodeI/O dataset, in `notebooks/codeio.ipynb`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "from pathlib import Path\n",
+    "\n",
+    "with open(Path(\"output/processed_code.jsonl\"), \"r\") as f:\n",
+    "    samples = [json.loads(line) for line in f]\n",
+    "\n",
+    "for sample in samples:\n",
+    "    main_code = sample[\"reference_code\"]\n",
+    "    del sample[\"reference_code\"]\n",
+    "    if \"def main(\" in main_code:\n",
+    "        main_code = main_code.replace(\"def main(\", \"def main_solution(\")\n",
+    "    sample[\"code_sample\"] = main_code\n",
+    "\n",
+    "    input_generator = sample[\"input_generator\"]\n",
+    "    if \"def input_generator()\" in input_generator:\n",
+    "        input_generator = input_generator.replace(\"def input_generator()\", \"def generate_inputs(random: Random)\")\n",
+    "    if \"import random\" in input_generator:\n",
+    "        input_generator = input_generator.replace(\"import random\\n    \", \"\").replace(\"import random\\n\", \"\")\n",
+    "    sample[\"input_generator\"] = input_generator\n",
+    "\n",
+    "    sample[\"input_output_spec\"] = sample[\"parameters\"]\n",
+    "    del sample[\"parameters\"]\n",
+    "\n",
+    "    sample[\"task_description\"] = sample[\"query\"]\n",
+    "    del sample[\"query\"]\n",
+    "\n",
+    "with open(Path(\"output/formatted_code.jsonl\"), \"w\") as f:\n",
+    "    for sample in samples:\n",
+    "        f.write(json.dumps(sample) + \"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we need to filter out unsuitable samples from the data. First we prioritise samples which are inherently random, reliant on external services (e.g. network requests), or whose input generators do not match the correct random usage requirements, as this could cause irreproducibility in RL training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removing sample 6 due to bad input generator\n",
+      "Removing sample 8 due to bad input generator\n",
+      "Removing sample 28 due to bad input generator\n",
+      "Removing sample 30 due to bad input generator\n",
+      "Removing sample 39 due to bad main solution\n",
+      "Removing sample 43 due to bad main solution\n",
+      "Removing sample 47 due to bad main solution\n",
+      "Removing sample 53 due to bad input generator\n",
+      "Removing sample 59 due to bad input generator\n",
+      "Removing sample 64 due to bad main solution\n",
+      "Removing sample 87 due to bad main solution\n",
+      "Removing sample 112 due to bad main solution\n",
+      "Removing sample 116 due to bad main solution\n",
+      "Removing sample 121 due to bad input generator\n",
+      "Removing sample 141 due to bad main solution\n",
+      "Removing sample 144 due to bad main solution\n",
+      "Removing sample 150 due to bad main solution\n",
+      "Removing sample 155 due to bad main solution\n",
+      "Removing sample 159 due to bad main solution\n",
+      "Removing sample 162 due to bad input generator\n",
+      "Removing sample 168 due to bad input generator\n",
+      "Removing sample 170 due to bad main solution\n",
+      "Removing sample 189 due to bad input generator\n",
+      "Removing sample 206 due to bad input generator\n",
+      "Removing sample 236 due to bad main solution\n",
+      "Removing sample 245 due to bad main solution\n",
+      "Removing sample 253 due to bad main solution\n",
+      "Removing sample 255 due to bad main solution\n",
+      "Removing sample 279 due to bad main solution\n",
+      "Removing sample 320 due to bad input generator\n",
+      "Removing sample 324 due to bad main solution\n",
+      "Removing sample 339 due to bad main solution\n",
+      "Removing sample 346 due to bad main solution\n",
+      "Removing sample 371 due to bad input generator\n",
+      "Removing sample 372 due to bad input generator\n",
+      "Removing sample 375 due to bad main solution\n",
+      "Removing sample 390 due to bad input generator\n",
+      "Removing sample 415 due to bad input generator\n",
+      "Removing sample 422 due to bad input generator\n",
+      "Removing sample 429 due to bad input generator\n",
+      "Removing sample 434 due to bad main solution\n",
+      "Removing sample 453 due to bad input generator\n",
+      "Removing sample 461 due to bad main solution\n",
+      "Removing sample 463 due to bad main solution\n",
+      "Removing sample 465 due to bad main solution\n",
+      "Removing sample 471 due to bad input generator\n",
+      "Removing sample 475 due to bad input generator\n",
+      "Removing sample 482 due to bad main solution\n",
+      "Removing sample 500 due to bad main solution\n",
+      "Removing sample 507 due to bad input generator\n",
+      "Removing sample 508 due to bad input generator\n",
+      "Removing sample 510 due to bad input generator\n",
+      "Removing sample 516 due to bad main solution\n",
+      "Removing sample 517 due to bad main solution\n",
+      "Removing sample 529 due to bad input generator\n",
+      "Removing sample 558 due to bad main solution\n",
+      "Removing sample 570 due to bad main solution\n",
+      "Removing sample 595 due to bad main solution\n",
+      "Removing sample 596 due to bad input generator\n",
+      "Removing sample 605 due to bad main solution\n",
+      "Removing sample 622 due to bad main solution\n",
+      "Removing sample 635 due to bad main solution\n",
+      "Removing sample 639 due to bad main solution\n",
+      "Removing sample 653 due to bad main solution\n",
+      "Removing sample 662 due to bad input generator\n",
+      "Removing sample 663 due to bad main solution\n",
+      "Removing sample 678 due to bad input generator\n",
+      "Removing sample 686 due to bad input generator\n",
+      "Removing sample 687 due to bad main solution\n",
+      "Removing sample 704 due to bad main solution\n",
+      "Removing sample 737 due to bad main solution\n",
+      "Removing sample 773 due to bad main solution\n",
+      "Removing sample 778 due to bad input generator\n",
+      "Removing sample 793 due to bad input generator\n",
+      "Removing sample 798 due to bad main solution\n",
+      "Removing sample 819 due to bad main solution\n",
+      "Removing sample 823 due to bad input generator\n",
+      "Removing sample 834 due to bad main solution\n",
+      "Removing sample 840 due to bad main solution\n",
+      "Removing sample 844 due to bad input generator\n",
+      "Removing sample 861 due to bad input generator\n",
+      "Removed 81 samples\n"
+     ]
+    }
+   ],
+   "source": [
+    "def verify_input_generator(input_generator_code):\n",
+    "    if \"def generate_inputs(random: Random)\" not in input_generator_code and \"def generate_inputs(rng: Random)\" not in input_generator_code:\n",
+    "        return False\n",
+    "    if \"import numpy\" in input_generator_code or \"np.random\" in input_generator_code:\n",
+    "        return False\n",
+    "    if \"import random\" in input_generator_code:\n",
+    "        return False\n",
+    "    return True\n",
+    "\n",
+    "def verify_main_solution(main_solution_code):\n",
+    "    if \"def main_solution(\" not in main_solution_code:\n",
+    "        return False\n",
+    "    if \"import random\" in main_solution_code:\n",
+    "        return False\n",
+    "    if \"from random import\" in main_solution_code:\n",
+    "        return False\n",
+    "    if \"np.random\" in main_solution_code:\n",
+    "        return False\n",
+    "    if \"import requests\" in main_solution_code or \" requests.\" in main_solution_code or \"from requests import\" in main_solution_code:\n",
+    "        return False\n",
+    "    return True\n",
+    "\n",
+    "remove = set()\n",
+    "for i, sample in enumerate(samples):\n",
+    "    if not verify_input_generator(sample[\"input_generator\"]):\n",
+    "        remove.add(i)\n",
+    "        print(f\"Removing sample {i} due to bad input generator\")\n",
+    "    elif not verify_main_solution(sample[\"code_sample\"]):\n",
+    "        remove.add(i)\n",
+    "        print(f\"Removing sample {i} due to bad main solution\")\n",
+    "\n",
+    "removed_samples = [sample for i, sample in enumerate(samples) if i in remove]\n",
+    "samples = [sample for i, sample in enumerate(samples) if i not in remove]\n",
+    "print(f\"Removed {len(remove)} samples\")\n",
+    "\n",
+    "with open(Path(\"output/filtered_code.jsonl\"), \"w\") as f:\n",
+    "    for sample in samples:\n",
+    "        f.write(json.dumps(sample) + \"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'def generate_inputs(random: Random):\\n    import numpy as np\\n    \\n    height = random.randint(10, 20)\\n    width = random.randint(10, 20)\\n    image0 = np.random.rand(height, width)\\n    image1 = np.random.rand(height, width)\\n    num_iter = random.randint(10, 100)\\n    alpha = random.uniform(0.01, 1.0) if random.choice([True, False]) else None\\n\\n    return {\"image0\": image0, \"image1\": image1, \"num_iter\": num_iter, \"alpha\": alpha}'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "removed_samples[0][\"input_generator\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'def main_solution(search_terms):\\n    import requests\\n    from bs4 import BeautifulSoup\\n    from fake_useragent import UserAgent\\n    import webbrowser\\n\\n    url = \"https://www.google.com/search?q=\" + \" \".join(search_terms)\\n    res = requests.get(url, headers={\"UserAgent\": UserAgent().random}, timeout=10)\\n    soup = BeautifulSoup(res.text, \"html.parser\")\\n    links = list(soup.select(\".eZt8xd\"))[:5]\\n\\n    opened_links = []\\n    for link in links:\\n        if link.text == \"Maps\":\\n            opened_links.append(link.get(\"href\"))\\n            webbrowser.open(link.get(\"href\"))\\n        else:\\n            opened_links.append(f\"https://google.com{link.get(\\'href\\')}\")\\n            webbrowser.open(f\"https://google.com{link.get(\\'href\\')}\")\\n\\n    return {\"opened_links\": opened_links}'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "removed_samples[43][\"code_sample\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "import asyncio\n",
+    "import os\n",
+    "from openai import AsyncOpenAI\n",
+    "from openai.types.chat import ChatCompletion, ChatCompletionMessageParam\n",
+    "from typing import Any, Iterable\n",
+    "\n",
+    "VERIFY_PROMPT = \"\"\"\n",
+    "Given the following code snippet, you must verify whether it is deterministic.\n",
+    "\n",
+    "It is not deterministic if it utilises potentially non-deterministic functions such as random number generators, network requests, or time functions. It also qualifies as non-deterministic if it calls another function or library which in turn produces non-deterministic outputs.\n",
+    "\n",
+    "Code snippet:\n",
+    "\n",
+    "{0}\n",
+    "\n",
+    "If the function is deterministic, return True. Otherwise, return False. Respond only with this one work, no other content or explanation.\n",
+    "\"\"\"\n",
+    "\n",
+    "# Cap concurrent requests. I had to set this to 1 for the DeepSeek API to work, YMMV\n",
+    "semaphore = asyncio.Semaphore(1)\n",
+    "\n",
+    "async def llm_generate(\n",
+    "    client: AsyncOpenAI,\n",
+    "    messages: Iterable[ChatCompletionMessageParam],\n",
+    "    sampling_params: dict[str, Any],\n",
+    "    retry_empty_response: bool = True,\n",
+    "    max_retries: int = 3,\n",
+    ") -> ChatCompletion:\n",
+    "    for trial in range(max_retries):\n",
+    "        async with semaphore:\n",
+    "            try:\n",
+    "                completion = await client.chat.completions.create(\n",
+    "                    messages=messages, **sampling_params\n",
+    "                )\n",
+    "                if completion.choices[0].message.content or not retry_empty_response:\n",
+    "                    return completion\n",
+    "                await asyncio.sleep(5)\n",
+    "            except Exception as e:\n",
+    "                print(f\"Failure response (trial {trial}):\", e)\n",
+    "                await asyncio.sleep(3 * (trial + 1))\n",
+    "                if trial == max_retries - 1:\n",
+    "                    raise\n",
+    "\n",
+    "client = AsyncOpenAI(\n",
+    "    base_url=os.getenv(\"API_BASE_URL\"),\n",
+    "    api_key=os.getenv(\"API_KEY\"),\n",
+    "    timeout=120.0,\n",
+    ")\n",
+    "\n",
+    "sampling_params = {\n",
+    "    \"model\": \"deepseek-chat\",  # For DeepSeek API\n",
+    "    #\"model\": \"deepseek/deepseek-chat:free\",  # For OpenRouter\n",
+    "    \"max_tokens\": 8192,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "33it [04:49,  8.14s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 32 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "58it [08:49,  9.66s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 57 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "147it [23:40, 12.39s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 146 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "152it [24:19,  8.55s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 151 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "158it [25:30, 10.53s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 157 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "172it [27:33,  7.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 171 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "173it [27:47,  9.64s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 172 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "231it [37:31,  9.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 230 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "285it [48:06, 10.91s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 284 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "343it [58:49, 15.48s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 342 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "363it [1:02:19, 11.92s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 362 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "374it [1:04:16, 11.96s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 373 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "394it [1:07:47, 11.56s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 393 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "429it [1:14:50, 11.54s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 428 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "451it [1:19:16, 12.64s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 450 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "555it [1:40:31,  9.80s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 554 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "603it [1:48:46,  9.54s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 602 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "634it [1:53:27, 10.77s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 633 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "638it [1:53:59,  8.85s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 637 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "685it [2:01:43, 10.44s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 684 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "689it [2:02:21,  9.03s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample 688 is non-deterministic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "782it [2:19:05, 10.67s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removed 81 samples\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tqdm import tqdm\n",
+    "\n",
+    "remove_nondeterministic = set()\n",
+    "for i, sample in tqdm(enumerate(samples)):\n",
+    "    messages = [\n",
+    "        {\"role\": \"user\", \"content\": VERIFY_PROMPT.format(sample[\"code_sample\"])},\n",
+    "    ]\n",
+    "    completion = await llm_generate(client, messages, sampling_params)\n",
+    "    content = completion.choices[0].message.content\n",
+    "    if not content or content.strip() not in [\"True\", \"False\"]:\n",
+    "        print(f\"Sample {i} failed to verify\")\n",
+    "        print(content)\n",
+    "    elif content.strip() == \"False\":\n",
+    "        print(f\"Sample {i} is non-deterministic\")\n",
+    "        remove_nondeterministic.add(i)\n",
+    "\n",
+    "removed_samples = [sample for i, sample in enumerate(samples) if i in remove]\n",
+    "samples = [sample for i, sample in enumerate(samples) if i not in remove]\n",
+    "print(f\"Removed {len(remove)} samples\")\n",
+    "\n",
+    "with open(Path(\"output/filtered_code_2.jsonl\"), \"w\") as f:\n",
+    "    for sample in samples:\n",
+    "        f.write(json.dumps(sample) + \"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'def main_solution(message, word_percentage=20, letter_percentage=85):\\n    ENGLISH_WORDS = {}\\n    with open(\"dictionary.txt\") as dictionary_file:\\n        for word in dictionary_file.read().split(\"\\\\n\"):\\n            ENGLISH_WORDS[word] = None\\n\\n    def remove_non_letters(message):\\n        return \"\".join(symbol for symbol in message if symbol in ascii_letters + \" \\\\t\\\\n\")\\n\\n    def get_english_count(message):\\n        message = message.upper()\\n        message = remove_non_letters(message)\\n        possible_words = message.split()\\n        matches = len([word for word in possible_words if word in ENGLISH_WORDS])\\n        return float(matches) / len(possible_words)\\n\\n    words_match = get_english_count(message) * 100 >= word_percentage\\n    num_letters = len(remove_non_letters(message))\\n    message_letters_percentage = (float(num_letters) / len(message)) * 100\\n    letters_match = message_letters_percentage >= letter_percentage\\n    is_english = words_match and letters_match\\n\\n    return {\"is_english\": is_english}'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "removed_samples[0][\"code_sample\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: following the above steps, two further filtering steps were taken:\n",
+    "\n",
+    "- manually review every code snippet for security issues, dependencies on libraries, or non-determinism missed by the LLM classification\n",
+    "- run every code snippet and input generator 100 times, dropping any which caused an error"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/reasoning_gym/code/codeio.py b/reasoning_gym/code/codeio.py
index bb1946c7..10f5ee48 100644
--- a/reasoning_gym/code/codeio.py
+++ b/reasoning_gym/code/codeio.py
@@ -1,7 +1,6 @@
 import gzip
 import json
 from dataclasses import dataclass
-from pathlib import Path
 from random import Random
 from typing import Any, Optional
 
@@ -79,16 +78,19 @@ class CodeIODataset(ProceduralDataset):
         with gzip.open(self._data_path, "rt", encoding="utf-8") as f:
             CodeIODataset._jsonl_data = [json.loads(line) for line in f]
 
-    def _generate_io_pairs(self, main_code: str, input_generator_code: str, num_pairs: int = 1):
+    def _generate_io_pair(self, main_code: str, input_generator_code: str, rng: Random, max_retries: int = 3):
         local_vars = {}
-        exec(main_code, {}, local_vars)
-        exec(input_generator_code, {}, local_vars)
-        io_pairs = []
-        for _ in range(num_pairs):
-            inputs = local_vars["input_generator"]()
-            outputs = local_vars["main"](**inputs)
-            io_pairs.append((inputs, outputs))
-        return io_pairs
+        exec(main_code, {"Random": Random}, local_vars)
+        exec(input_generator_code, {"Random": Random}, local_vars)
+        for _ in range(max_retries):
+            try:
+                inputs = local_vars["generate_inputs"](rng)
+                outputs = local_vars["main_solution"](**inputs)
+            except Exception:
+                # Retry
+                continue
+            return inputs, outputs
+        return {}, {}
 
     def __getitem__(self, idx: int) -> dict:
         """Generate a single CodeI/O reasoning task"""
@@ -96,12 +98,12 @@ class CodeIODataset(ProceduralDataset):
 
         json_data = rng.choice(CodeIODataset._jsonl_data)
 
-        query = json_data["query"]
-        parameters = json_data["parameters"]
-        reference_code = json_data["reference_code"]
+        query = json_data["task_description"]
+        parameters = json_data["input_output_spec"]
+        reference_code = json_data["code_sample"]
         input_generator_code = json_data["input_generator"]
 
-        input_data, output_data = self._generate_io_pairs(reference_code, input_generator_code, num_pairs=1)[0]
+        input_data, output_data = self._generate_io_pair(reference_code, input_generator_code, rng)
 
         if rng.random() < self.config.input_prediction_probability:
             question = OUTPUT_PREDICTION_PROMPT_TEMPLATE.format(query, parameters, input_data, reference_code)
@@ -113,7 +115,7 @@ class CodeIODataset(ProceduralDataset):
         return {
             "question": question,
             "answer": solution,
-            "metadata": {},
+            "metadata": {"input_data": input_data, "output_data": output_data},
         }
 
     def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
@@ -142,15 +144,17 @@ class CodeIODataset(ProceduralDataset):
                         reward = 0.1
                     else:
                         # At least we got a JSON object, I guess?
-                        reward = 0.05
+                        reward = 0.01
                 except json.JSONDecodeError:
                     if oracle_answer in answer:
                         reward = len(oracle_answer) / len(answer)
+                    else:
+                        reward = 0.00
             elif oracle_answer in answer:
                 # max() to avoid penalising too heavily, since correct answers are short here
                 reward = max(len(oracle_answer) / len(answer), 0.2)
             else:
-                reward = 0.01
+                reward = 0.00
 
         return reward
 
diff --git a/reasoning_gym/data/codeio.jsonl.gz b/reasoning_gym/data/codeio.jsonl.gz
index 19962396..70ba8d9d 100644
Binary files a/reasoning_gym/data/codeio.jsonl.gz and b/reasoning_gym/data/codeio.jsonl.gz differ
diff --git a/tests/test_codeio.py b/tests/test_codeio.py
new file mode 100644
index 00000000..34478d9a
--- /dev/null
+++ b/tests/test_codeio.py
@@ -0,0 +1,42 @@
+import pytest
+
+from reasoning_gym.code.codeio import CodeIOConfig, CodeIODataset
+
+
+def test_codeio_dataset():
+    # Create a small CodeI/O reasoning dataset
+    config = CodeIOConfig(size=10, seed=42)
+    dataset = CodeIODataset(config)
+
+    for i in range(10):
+        item = dataset[i]
+
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        assert "input_data" in item["metadata"]
+        assert "output_data" in item["metadata"]
+
+        # Score some correct and incorrect answers
+        score = dataset.score_answer(answer=item["answer"], entry=item)
+        assert score == 1.0
+        # Incorrect answer (None)
+        score = dataset.score_answer(answer=None, entry=item)
+        assert score == 0.00
+        # Incorrect answer (empty dict)
+        score = dataset.score_answer(answer="{}", entry=item)
+        assert score == 0.01
+
+
+def test_codeio_config():
+    # Test constraints on input probability
+    with pytest.raises(AssertionError):
+        CodeIOConfig(size=10, seed=42, input_prediction_probability=1.1).validate()
+
+    with pytest.raises(AssertionError):
+        CodeIOConfig(size=10, seed=42, input_prediction_probability=-0.1).validate()
+
+    CodeIOConfig(size=10, seed=42, input_prediction_probability=0.1).validate()
+    CodeIOConfig(size=10, seed=42, input_prediction_probability=0.9).validate()