diff --git a/.gitignore b/.gitignore
index d1e0d496..84b1ed2a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,5 @@ htmlcov/
# Jupyter Notebook
.ipynb_checkpoints/
.virtual_documents/
+
+data/
\ No newline at end of file
diff --git a/notebooks/codeio.ipynb b/notebooks/codeio.ipynb
new file mode 100644
index 00000000..4e41f77b
--- /dev/null
+++ b/notebooks/codeio.ipynb
@@ -0,0 +1,314 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import datasets\n",
+ "import re\n",
+ "import json\n",
+ "from tqdm import tqdm\n",
+ "import os\n",
+ "import requests\n",
+ "import random\n",
+ "from random import Random"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = datasets.load_dataset(\"hkust-nlp/CodeIO-PyEdu-Reasoning\")['train']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Extract the relevant parts of the prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1630607/1630607 [01:20<00:00, 20302.13it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "There were 1489543 out of 1630607 duplicate entries\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "pattern = re.compile(\n",
+ " r'(?s)' # DOTALL so . matches newlines\n",
+ " r'You are given a question that requires some input and output variables as follows:\\s*(.*?)'\n",
+ " r'\\s*The input and output requirements are as follows:\\s*(.*?)'\n",
+ " r'\\s*Given the following.*?Tip: Here is a reference code snippet for this question\\. '\n",
+ " r'You can refer to this code to guide your reasoning but not copy spans of code directly\\.\\s*(.*)'\n",
+ ")\n",
+ "\n",
+ "seen = set()\n",
+ "duplicate = 0\n",
+ "\n",
+ "with open(\"data/codeio-pyedu-extracted.jsonl\", \"w+\") as f:\n",
+ " for i, item in tqdm(enumerate(dataset), total=len(dataset)):\n",
+ " match = pattern.search(item[\"prompt\"])\n",
+ " if match:\n",
+ " # Extract relevant info\n",
+ " task_description = match.group(1).strip()\n",
+ " input_output_spec = match.group(2).strip()\n",
+ " code_sample = match.group(3).strip()\n",
+ "\n",
+ " # Check if code sample is unique\n",
+ " hash_entry = f\"{hash(task_description)}-{hash(input_output_spec)}-{hash(code_sample)}\"\n",
+ " if hash_entry in seen:\n",
+ " duplicate += 1\n",
+ " continue\n",
+ " seen.add(hash_entry)\n",
+ "\n",
+ " # Save to disk\n",
+ " json.dump({\n",
+ " \"task_description\": task_description,\n",
+ " \"input_output_spec\": input_output_spec,\n",
+ " \"code_sample\": code_sample\n",
+ " }, f)\n",
+ " f.write(\"\\n\")\n",
+ " else:\n",
+ " print(f\"No match found for item {i}\")\n",
+ "\n",
+ "print(f\"There were {duplicate} out of {len(dataset)} duplicate entries\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create input generators for each problem separately"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "SYSTEM_PROMPT = \"\"\"You are a helpful assistant that generates valid Python functions that act as input generators for a given code snippet.\n",
+ "\n",
+ "You have access to `random.Random`, therefore you SHOULD NOT import it again. You should use this random number generator to make the input generation process stochastic on each call.\n",
+ "\n",
+ "When the user asks you to generate an input for a code snippet, you should strictly respond in the following format:\n",
+ "\n",
+ "def generate_input(rng: Random) -> dict:\n",
+ " # Your code here\n",
+ " pass\n",
+ "\n",
+ "\n",
+ "The output of the function should be a dictionary where the keys are the variable names and the values are the generated values.\n",
+ "\n",
+ "It must contain all the variables that listed in the user's input specification, or more precisely in the `main_solution` function signature. \n",
+ "\"\"\"\n",
+ "\n",
+ "USER_PROMPT = \"\"\"Following are a task description, input/output specification, and relevant code snippet for a Python programming task.\n",
+ "\n",
+ "\n",
+ "{task_description}\n",
+ "\n",
+ "\n",
+ "\n",
+ "{input_output_spec}\n",
+ "\n",
+ "\n",
+ "\n",
+ "{code_sample}\n",
+ "\n",
+ "\n",
+ "Your task is to write a Python function `generate_input(rng: Random) -> dict` that generates valid inputs for the given code snippet, based on the provided information.\n",
+ "\"\"\"\n",
+ "\n",
+ "with open(\"data/codeio-pyedu-extracted.jsonl\", \"r\") as f:\n",
+ " for i in range(1):\n",
+ " entry = json.loads(f.readline())\n",
+ " response = requests.post(\n",
+ " url=\"https://openrouter.ai/api/v1/chat/completions\",\n",
+ " headers={\n",
+ " \"Authorization\": f\"Bearer {os.getenv('OPENROUTER_API_KEY')}\",\n",
+ " \"Content-Type\": \"application/json\",\n",
+ " },\n",
+ " data = json.dumps({\n",
+ " \"model\": \"deepseek/deepseek-chat\",\n",
+ " \"messages\": [\n",
+ " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
+ " {\"role\": \"user\", \"content\": USER_PROMPT.format(**entry)}\n",
+ " ]\n",
+ " })\n",
+ " )\n",
+ " full_response = response.json()[\"choices\"][0][\"message\"][\"content\"]\n",
+ " input_generator = re.search(r\"(.*?)\", full_response, re.DOTALL).group(1).strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "In the context of Conway's Game of Life, a cellular automaton devised by John Horton Conway, consider a board with `m` by `n` cells, where each cell can be either live (1) or dead (0). The state of each cell evolves based on its neighbors according to specific rules. Given the current state of the board, what will be the state of the board after one iteration of the game?\n",
+ "----------------\n",
+ "Input:\n",
+ " `board` (List[List[int]]): A 2D list representing the state of the board. Each element in the list is either `0` (dead cell) or `1` (live cell).\n",
+ "\n",
+ "Output:\n",
+ " `return` (List[List[int]]): A 2D list representing the next state of the board after applying the rules of Conway's Game of Life. Each element in the list is either `0` (dead cell) or `1` (live cell).\n",
+ "----------------\n",
+ "# import necessary packages\n",
+ "from collections import Counter\n",
+ "\n",
+ "# all class and function definitions in the code file, if any\n",
+ "class Solution(object):\n",
+ " def gameOfLifeInfinite(self, live):\n",
+ " ctr = Counter((I, J)\n",
+ " for i, j in live\n",
+ " for I in range(i-1, i+2)\n",
+ " for J in range(j-1, j+2)\n",
+ " if I != i or J != j)\n",
+ "\n",
+ " return {ij\n",
+ " for ij in ctr\n",
+ " if ctr[ij] == 3 or ctr[ij] == 2 and ij in live}\n",
+ "\n",
+ " def gameOfLife(self, board):\n",
+ " live_cell = {(row, col) for row in range(len(board)) for col in range(len(board[0])) if board[row][col]}\n",
+ " live_cell_next = self.gameOfLifeInfinite(live_cell)\n",
+ " for i, row in enumerate(board):\n",
+ " for j in range(len(row)):\n",
+ " board[i][j] = int((i, j) in live_cell_next)\n",
+ " return board\n",
+ "\n",
+ "# main function\n",
+ "def main_solution(board):\n",
+ " # Convert the input board to a list of lists if it's not already\n",
+ " if not isinstance(board, list) or not all(isinstance(row, list) for row in board):\n",
+ " raise ValueError(\"Input board must be a list of lists\")\n",
+ " \n",
+ " # Call the gameOfLife function to get the next state of the board\n",
+ " solution = Solution()\n",
+ " next_state = solution.gameOfLife(board)\n",
+ " \n",
+ " # Return the next state of the board\n",
+ " return next_state\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(entry[\"task_description\"])\n",
+ "print(\"----------------\")\n",
+ "print(entry[\"input_output_spec\"])\n",
+ "print(\"----------------\")\n",
+ "print(entry[\"code_sample\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "def generate_input(rng: Random) -> dict:\n",
+ " # Generate random dimensions for the board\n",
+ " m = rng.randint(1, 10) # Number of rows\n",
+ " n = rng.randint(1, 10) # Number of columns\n",
+ " \n",
+ " # Generate the board with random 0s and 1s\n",
+ " board = [[rng.choice([0, 1]) for _ in range(n)] for _ in range(m)]\n",
+ " \n",
+ " return {'board': board}\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(input_generator)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[0]: {'board': [[1, 0], [1, 1], [1, 0], [1, 0], [0, 0], [1, 1], [0, 0], [1, 0], [1, 1]]}\n",
+ "[1]: {'board': [[1, 1, 1, 0], [0, 1, 0, 1], [0, 1, 1, 0], [0, 0, 0, 0]]}\n",
+ "[2]: {'board': [[0]]}\n",
+ "[3]: {'board': [[0, 0, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 0, 0, 1, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 1, 1, 1, 0, 1, 0, 1, 0], [0, 1, 1, 0, 1, 0, 0, 0, 1, 1], [0, 0, 0, 0, 1, 1, 0, 1, 0, 1], [0, 0, 1, 1, 1, 1, 0, 0, 0, 1]]}\n",
+ "[4]: {'board': [[1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [1, 0, 1, 1, 1], [1, 0, 0, 1, 0], [0, 0, 0, 1, 1], [1, 1, 1, 0, 0]]}\n"
+ ]
+ }
+ ],
+ "source": [
+ "local_dict = {}\n",
+ "exec(input_generator, globals(), local_dict)\n",
+ "generate_input_func = local_dict['generate_input']\n",
+ "rng = random.Random()\n",
+ "\n",
+ "for i in range(5):\n",
+ " random_input = generate_input_func(rng)\n",
+ " print(f\"[{i}]: {random_input}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "reasoning_gym",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}