add code execution environment

2026-04-19 12:57:58 +00:00 · 2025-05-07 21:18:22 -07:00 · 2025-05-07 21:18:22 -07:00 · 63b18d891f
commit 63b18d891f
parent a282604baa
3 changed files with 307 additions and 0 deletions
--- a/environments/code_execution_server/Dockerfile
+++ b/environments/code_execution_server/Dockerfile
@ -0,0 +1,16 @@
+FROM python:3
+
+RUN echo "Acquire::http::Pipeline-Depth 0;" > /etc/apt/apt.conf.d/99custom && \
+    echo "Acquire::http::No-Cache true;" >> /etc/apt/apt.conf.d/99custom && \
+    echo "Acquire::BrokenProxy    true;" >> /etc/apt/apt.conf.d/99custom
+
+RUN apt-get update && apt-get upgrade -y \
+    && apt-get install -y build-essential
+
+RUN pip install flask
+
+WORKDIR /tmp
+
+COPY server.py /tmp/server.py
+
+CMD ["python", "server.py"]
--- a/environments/code_execution_server/coding_server.py
+++ b/environments/code_execution_server/coding_server.py
@ -0,0 +1,196 @@
+import random
+from typing import Dict, List, Optional, Tuple, TypedDict, Union
+
+from datasets import load_dataset
+from latex2sympy2_extended import NormalizationConfig
+from math_verify import LatexExtractionConfig, parse, verify
+from tqdm.asyncio import tqdm_asyncio
+import regex as re
+
+import asyncio
+import httpx
+import docker, os
+
+from atroposlib.envs.base import BaseEnv, BaseEnvConfig, OpenaiConfig, ScoredDataGroup
+from atroposlib.type_definitions import Item, number
+
+from atroposlib.type_definitions import GameHistory, Item
+from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
+
+system_prompt = (
+    "You are a deep thinking AI, you may use extremely long chains of thought "
+    "to deeply consider the problem and deliberate with yourself via systematic "
+    "reasoning processes to help come to a correct solution prior to answering. "
+    "You should enclose your thoughts and internal monologue inside <think> </think> "
+    "tags, and then provide your solution or response to the problem.\n\n"
+)
+
+async def submit_code(client, code, test_input, language="python"):
+    url = "http://localhost:5002/execute"
+    payload = {
+        "code": code,
+        "input": test_input,
+        "language": language
+    }
+    response = await client.post(url, json=payload)
+    response_json = response.json()
+    return response_json["output"]
+
+async def get_results(code, answer):
+    async with httpx.AsyncClient() as client:
+        tasks = []
+        for i in range(len(answer)):
+            tasks.append(submit_code(client, code, answer[i]))
+
+        results = await asyncio.gather(*tasks)
+    return [result for result in results]
+
+def init_docker():
+    client = docker.from_env()
+    def build_docker_image():
+        try:
+            # Build the Docker image
+            print("Building Docker image...")
+            current_dir = os.path.dirname(os.path.abspath(__file__))  # Get the current directory of the script
+            image, logs = client.images.build(path=current_dir, tag="code-executor")
+
+            # Print the build logs
+            for log in logs:
+                print(log.get('stream', '').strip())
+
+            print("Docker image built successfully.")
+            return image
+        except docker.errors.BuildError as e:
+            print(f"Error during Docker image build: {e}")
+
+    def run_docker_container():
+        try:
+            # Run the Docker container
+            print("Running Docker container...")
+            container = client.containers.run("code-executor",
+                                              ports={'5002/tcp': 5002},
+                                              detach=True)  # Runs in detached mode (in the background)
+
+            print(f"Docker container is running with ID: {container.id}")
+            return container
+        except docker.errors.ContainerError as e:
+            print(f"Error during Docker container run: {e}")
+    
+    build_docker_image()
+    container = run_docker_container()
+    return container
+
+class CodingEnv(BaseEnv):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    async def collect_trajectories(
+        self, item: Item
+    ) -> Tuple[GameHistory | None, List[Item]]:
+        chat_completions = await self.server.chat_completion(
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You must submit your answer with ```python\n{code}```",
+                },
+                dict(item[0][0]),
+            ],
+            n=self.config.group_size,
+            max_tokens=1024 * 4,
+        )
+        to_score = list()
+        to_backlog = list()
+        for i, chat_completion in enumerate(chat_completions.choices):
+            messages = (
+                dict(item[0][0]),
+                {"role": "assistant", "content": chat_completion.message.content},
+            )
+            to_score.append(
+                (
+                    messages,
+                    item[1],
+                )
+            )
+        
+        to_postprocess = await self.score(to_score)
+        return to_postprocess, to_backlog
+
+    async def evaluate(self, *args, **kwargs):
+        """
+        Evaluate the environment, this is called every steps_per_eval steps
+
+        Included here is an example on how to use eval workers to run a task.
+
+        You may however do whatever you want in this method.
+
+        :param args:
+        :param kwargs:
+        :return: None.
+        """
+        return
+
+    async def setup(self):
+        """Setup the environment"""
+        self.container = init_docker()
+        self.train = load_dataset("deepmind/code_contests", split="train")
+        self.iter = 0
+
+    async def get_next_item(self) -> Item:
+        """
+        Get the next items to be rolled out
+        """
+        next_item = self.train[self.iter % len(self.train)]
+        self.iter += 1
+        prompt = tuple(
+            [frozenset({"role": "user", "content": next_item["description"]}.items())]
+        )
+        answer = (tuple(next_item["private_tests"]["input"]), tuple(next_item["private_tests"]["output"]), tuple(next_item["generated_tests"]["input"]), tuple(next_item["generated_tests"]["output"]))
+        return (prompt, answer)
+
+    def extract_python_code_blocks(self, text):
+    # Regex specifically looks for ```python\n...code...\n```
+        pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
+        result = re.findall(pattern, text, re.DOTALL | re.MULTILINE)
+        python_blocks = [r for r in result]
+        return python_blocks
+
+    async def score(self, rollout_group_data) -> Optional[ScoredDataGroup]:
+        #print("Rollout group data", rollout_group_data)
+        scores = ScoredDataGroup()
+        scores["tokens"] = list()
+        scores["masks"] = list()
+        scores["scores"] = list()
+        random.shuffle(rollout_group_data)
+        for item in rollout_group_data:
+            out_dict = tokenize_for_trainer(self.tokenizer, item[0])
+            tokens = out_dict["tokens"]
+            masks = out_dict["masks"]
+            """
+            CALCULATE REWARD NOW
+            """
+            code = self.extract_python_code_blocks(item[0][-1]["content"])[0]
+            test_cases = list(item[1][0]) + list(item[1][2])
+            x = await get_results(code, test_cases)
+            output_cases = list(item[1][1]) + list(item[1][3])
+            assert len(x) == len(output_cases)
+            reward = True
+            for k in range(len(x)):
+                if x[k] != output_cases[k]:
+                    reward = False
+                    break
+            # remove obviously bad examples
+            if len([1 for i in masks if i != -100]) < 10:
+                continue
+            scores["tokens"].append(tokens)
+            scores["masks"].append(masks)
+            scores["scores"].append(1.0 if reward else -1.0)
+            if len(scores["tokens"]) >= self.config.group_size:
+                break
+        # check if all the same
+        # print(scores['scores'])
+        # if all([scores["scores"][0] == score for score in scores["scores"]]):
+        #     return None  # If all the same, we return None
+        return scores
+
+if __name__ == "__main__":
+    CodingEnv.cli()
--- a/environments/code_execution_server/server.py
+++ b/environments/code_execution_server/server.py
@ -0,0 +1,95 @@
+"""
+Instructions:
+
+# Build the image
+docker build -t cpp-flask-executor .
+
+# Run the container
+docker run -p 5002:5002 cpp-flask-executor
+
+curl -X POST http://localhost:5002/execute \
+  -H "Content-Type: application/json" \
+  -d '{"code": "#include<iostream>\nint main(){int x; std::cin>>x; std::cout<<x*x; return 0;}", "input": "4"}'
+
+
+
+"""
+import os
+import subprocess
+import uuid
+from flask import Flask, request, jsonify
+import time
+
+app = Flask(__name__)
+
+@app.route('/execute', methods=['POST'])
+def execute_code():
+    try:
+        # Receive C++ code from API request
+        data = request.json
+        code = data.get("code", "")
+        test_cases = data.get("input", "")
+
+        if not code:
+            return jsonify({"error": "No code provided"}), 400
+
+        file_id = str(uuid.uuid4())
+        py_filename = f"/tmp/{file_id}.py"
+
+        # Save the Python code to a file
+        with open(py_filename, "w") as f:
+            f.write(code)
+
+        # Execute Python code using subprocess
+        exec_result = subprocess.run(
+            ["python", py_filename],
+            input=test_cases,
+            capture_output=True,
+            text=True,
+            timeout=5  # Prevent infinite loops
+        )
+
+        # Cleanup temporary files
+        os.remove(py_filename)
+
+        return jsonify({
+            "output": exec_result.stdout,
+            "error": exec_result.stderr
+        })
+        """ C++ stuff
+
+        file_id = str(uuid.uuid4())
+        cpp_filename = f"/tmp/{file_id}.cpp"
+        exe_filename = f"/tmp/{file_id}.out"
+
+        # Save code to a file
+        with open(cpp_filename, "w") as f:
+            f.write(code)
+
+        # Compile C++ code
+        compile_result = subprocess.run(["g++", cpp_filename, "-o", exe_filename],
+                                        capture_output=True, text=True)
+
+        for i in range(5):
+            print(i)
+            time.sleep(1)
+
+        if compile_result.returncode != 0:
+            return jsonify({"error": "Compilation failed", "details": compile_result.stderr})
+
+        print(test_cases)
+        # Execute compiled binary
+        exec_result = subprocess.run([exe_filename], input=test_cases, capture_output=True, text=True, timeout=5)
+
+        # Cleanup temporary files
+        os.remove(cpp_filename)
+        os.remove(exe_filename)
+
+        return jsonify({"output": exec_result.stdout, "error": exec_result.stderr})"""
+
+    except Exception as e:
+        return jsonify({"error": str(e)})
+
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5002)
+