mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
add code execution environment
This commit is contained in:
parent
a282604baa
commit
63b18d891f
3 changed files with 307 additions and 0 deletions
16
environments/code_execution_server/Dockerfile
Normal file
16
environments/code_execution_server/Dockerfile
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
FROM python:3
|
||||
|
||||
RUN echo "Acquire::http::Pipeline-Depth 0;" > /etc/apt/apt.conf.d/99custom && \
|
||||
echo "Acquire::http::No-Cache true;" >> /etc/apt/apt.conf.d/99custom && \
|
||||
echo "Acquire::BrokenProxy true;" >> /etc/apt/apt.conf.d/99custom
|
||||
|
||||
RUN apt-get update && apt-get upgrade -y \
|
||||
&& apt-get install -y build-essential
|
||||
|
||||
RUN pip install flask
|
||||
|
||||
WORKDIR /tmp
|
||||
|
||||
COPY server.py /tmp/server.py
|
||||
|
||||
CMD ["python", "server.py"]
|
||||
196
environments/code_execution_server/coding_server.py
Normal file
196
environments/code_execution_server/coding_server.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import random
|
||||
from typing import Dict, List, Optional, Tuple, TypedDict, Union
|
||||
|
||||
from datasets import load_dataset
|
||||
from latex2sympy2_extended import NormalizationConfig
|
||||
from math_verify import LatexExtractionConfig, parse, verify
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
import regex as re
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import docker, os
|
||||
|
||||
from atroposlib.envs.base import BaseEnv, BaseEnvConfig, OpenaiConfig, ScoredDataGroup
|
||||
from atroposlib.type_definitions import Item, number
|
||||
|
||||
from atroposlib.type_definitions import GameHistory, Item
|
||||
from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
|
||||
|
||||
system_prompt = (
|
||||
"You are a deep thinking AI, you may use extremely long chains of thought "
|
||||
"to deeply consider the problem and deliberate with yourself via systematic "
|
||||
"reasoning processes to help come to a correct solution prior to answering. "
|
||||
"You should enclose your thoughts and internal monologue inside <think> </think> "
|
||||
"tags, and then provide your solution or response to the problem.\n\n"
|
||||
)
|
||||
|
||||
async def submit_code(client, code, test_input, language="python"):
|
||||
url = "http://localhost:5002/execute"
|
||||
payload = {
|
||||
"code": code,
|
||||
"input": test_input,
|
||||
"language": language
|
||||
}
|
||||
response = await client.post(url, json=payload)
|
||||
response_json = response.json()
|
||||
return response_json["output"]
|
||||
|
||||
async def get_results(code, answer):
|
||||
async with httpx.AsyncClient() as client:
|
||||
tasks = []
|
||||
for i in range(len(answer)):
|
||||
tasks.append(submit_code(client, code, answer[i]))
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
return [result for result in results]
|
||||
|
||||
def init_docker():
|
||||
client = docker.from_env()
|
||||
def build_docker_image():
|
||||
try:
|
||||
# Build the Docker image
|
||||
print("Building Docker image...")
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__)) # Get the current directory of the script
|
||||
image, logs = client.images.build(path=current_dir, tag="code-executor")
|
||||
|
||||
# Print the build logs
|
||||
for log in logs:
|
||||
print(log.get('stream', '').strip())
|
||||
|
||||
print("Docker image built successfully.")
|
||||
return image
|
||||
except docker.errors.BuildError as e:
|
||||
print(f"Error during Docker image build: {e}")
|
||||
|
||||
def run_docker_container():
|
||||
try:
|
||||
# Run the Docker container
|
||||
print("Running Docker container...")
|
||||
container = client.containers.run("code-executor",
|
||||
ports={'5002/tcp': 5002},
|
||||
detach=True) # Runs in detached mode (in the background)
|
||||
|
||||
print(f"Docker container is running with ID: {container.id}")
|
||||
return container
|
||||
except docker.errors.ContainerError as e:
|
||||
print(f"Error during Docker container run: {e}")
|
||||
|
||||
build_docker_image()
|
||||
container = run_docker_container()
|
||||
return container
|
||||
|
||||
class CodingEnv(BaseEnv):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
async def collect_trajectories(
|
||||
self, item: Item
|
||||
) -> Tuple[GameHistory | None, List[Item]]:
|
||||
chat_completions = await self.server.chat_completion(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You must submit your answer with ```python\n{code}```",
|
||||
},
|
||||
dict(item[0][0]),
|
||||
],
|
||||
n=self.config.group_size,
|
||||
max_tokens=1024 * 4,
|
||||
)
|
||||
to_score = list()
|
||||
to_backlog = list()
|
||||
for i, chat_completion in enumerate(chat_completions.choices):
|
||||
messages = (
|
||||
dict(item[0][0]),
|
||||
{"role": "assistant", "content": chat_completion.message.content},
|
||||
)
|
||||
to_score.append(
|
||||
(
|
||||
messages,
|
||||
item[1],
|
||||
)
|
||||
)
|
||||
|
||||
to_postprocess = await self.score(to_score)
|
||||
return to_postprocess, to_backlog
|
||||
|
||||
async def evaluate(self, *args, **kwargs):
|
||||
"""
|
||||
Evaluate the environment, this is called every steps_per_eval steps
|
||||
|
||||
Included here is an example on how to use eval workers to run a task.
|
||||
|
||||
You may however do whatever you want in this method.
|
||||
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return: None.
|
||||
"""
|
||||
return
|
||||
|
||||
async def setup(self):
|
||||
"""Setup the environment"""
|
||||
self.container = init_docker()
|
||||
self.train = load_dataset("deepmind/code_contests", split="train")
|
||||
self.iter = 0
|
||||
|
||||
async def get_next_item(self) -> Item:
|
||||
"""
|
||||
Get the next items to be rolled out
|
||||
"""
|
||||
next_item = self.train[self.iter % len(self.train)]
|
||||
self.iter += 1
|
||||
prompt = tuple(
|
||||
[frozenset({"role": "user", "content": next_item["description"]}.items())]
|
||||
)
|
||||
answer = (tuple(next_item["private_tests"]["input"]), tuple(next_item["private_tests"]["output"]), tuple(next_item["generated_tests"]["input"]), tuple(next_item["generated_tests"]["output"]))
|
||||
return (prompt, answer)
|
||||
|
||||
def extract_python_code_blocks(self, text):
|
||||
# Regex specifically looks for ```python\n...code...\n```
|
||||
pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
|
||||
result = re.findall(pattern, text, re.DOTALL | re.MULTILINE)
|
||||
python_blocks = [r for r in result]
|
||||
return python_blocks
|
||||
|
||||
async def score(self, rollout_group_data) -> Optional[ScoredDataGroup]:
|
||||
#print("Rollout group data", rollout_group_data)
|
||||
scores = ScoredDataGroup()
|
||||
scores["tokens"] = list()
|
||||
scores["masks"] = list()
|
||||
scores["scores"] = list()
|
||||
random.shuffle(rollout_group_data)
|
||||
for item in rollout_group_data:
|
||||
out_dict = tokenize_for_trainer(self.tokenizer, item[0])
|
||||
tokens = out_dict["tokens"]
|
||||
masks = out_dict["masks"]
|
||||
"""
|
||||
CALCULATE REWARD NOW
|
||||
"""
|
||||
code = self.extract_python_code_blocks(item[0][-1]["content"])[0]
|
||||
test_cases = list(item[1][0]) + list(item[1][2])
|
||||
x = await get_results(code, test_cases)
|
||||
output_cases = list(item[1][1]) + list(item[1][3])
|
||||
assert len(x) == len(output_cases)
|
||||
reward = True
|
||||
for k in range(len(x)):
|
||||
if x[k] != output_cases[k]:
|
||||
reward = False
|
||||
break
|
||||
# remove obviously bad examples
|
||||
if len([1 for i in masks if i != -100]) < 10:
|
||||
continue
|
||||
scores["tokens"].append(tokens)
|
||||
scores["masks"].append(masks)
|
||||
scores["scores"].append(1.0 if reward else -1.0)
|
||||
if len(scores["tokens"]) >= self.config.group_size:
|
||||
break
|
||||
# check if all the same
|
||||
# print(scores['scores'])
|
||||
# if all([scores["scores"][0] == score for score in scores["scores"]]):
|
||||
# return None # If all the same, we return None
|
||||
return scores
|
||||
|
||||
if __name__ == "__main__":
|
||||
CodingEnv.cli()
|
||||
95
environments/code_execution_server/server.py
Normal file
95
environments/code_execution_server/server.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
"""
|
||||
Instructions:
|
||||
|
||||
# Build the image
|
||||
docker build -t cpp-flask-executor .
|
||||
|
||||
# Run the container
|
||||
docker run -p 5002:5002 cpp-flask-executor
|
||||
|
||||
curl -X POST http://localhost:5002/execute \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"code": "#include<iostream>\nint main(){int x; std::cin>>x; std::cout<<x*x; return 0;}", "input": "4"}'
|
||||
|
||||
|
||||
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import uuid
|
||||
from flask import Flask, request, jsonify
|
||||
import time
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/execute', methods=['POST'])
|
||||
def execute_code():
|
||||
try:
|
||||
# Receive C++ code from API request
|
||||
data = request.json
|
||||
code = data.get("code", "")
|
||||
test_cases = data.get("input", "")
|
||||
|
||||
if not code:
|
||||
return jsonify({"error": "No code provided"}), 400
|
||||
|
||||
file_id = str(uuid.uuid4())
|
||||
py_filename = f"/tmp/{file_id}.py"
|
||||
|
||||
# Save the Python code to a file
|
||||
with open(py_filename, "w") as f:
|
||||
f.write(code)
|
||||
|
||||
# Execute Python code using subprocess
|
||||
exec_result = subprocess.run(
|
||||
["python", py_filename],
|
||||
input=test_cases,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5 # Prevent infinite loops
|
||||
)
|
||||
|
||||
# Cleanup temporary files
|
||||
os.remove(py_filename)
|
||||
|
||||
return jsonify({
|
||||
"output": exec_result.stdout,
|
||||
"error": exec_result.stderr
|
||||
})
|
||||
""" C++ stuff
|
||||
|
||||
file_id = str(uuid.uuid4())
|
||||
cpp_filename = f"/tmp/{file_id}.cpp"
|
||||
exe_filename = f"/tmp/{file_id}.out"
|
||||
|
||||
# Save code to a file
|
||||
with open(cpp_filename, "w") as f:
|
||||
f.write(code)
|
||||
|
||||
# Compile C++ code
|
||||
compile_result = subprocess.run(["g++", cpp_filename, "-o", exe_filename],
|
||||
capture_output=True, text=True)
|
||||
|
||||
for i in range(5):
|
||||
print(i)
|
||||
time.sleep(1)
|
||||
|
||||
if compile_result.returncode != 0:
|
||||
return jsonify({"error": "Compilation failed", "details": compile_result.stderr})
|
||||
|
||||
print(test_cases)
|
||||
# Execute compiled binary
|
||||
exec_result = subprocess.run([exe_filename], input=test_cases, capture_output=True, text=True, timeout=5)
|
||||
|
||||
# Cleanup temporary files
|
||||
os.remove(cpp_filename)
|
||||
os.remove(exe_filename)
|
||||
|
||||
return jsonify({"output": exec_result.stdout, "error": exec_result.stderr})"""
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host="0.0.0.0", port=5002)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue