add eval demo for generated script

This commit is contained in:
Andreas Koepf 2025-01-29 18:28:17 +01:00
parent d6c9a534af
commit 7c0509db7a

View file

@ -2,13 +2,13 @@
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"prompt_template = \"\"\"You need to generate python code for a synthetic procedural dataset. The dataset is similar to OpenAI's GSM8K which contains grade-school level math questions in natural language.\n",
"\n",
"Here is a the SOURCE item from the dataset which you should translate into a python generator:\n",
"Here is a the SOURCE item which you should translate into a python generator:\n",
"\n",
"```json\n",
"{0}\n",
@ -42,7 +42,7 @@
"Your task:\n",
"\n",
"- Generate reasonable random values for all the variables\n",
"- Ensure mathematical consistency (total distance is divisible by distance per interval)\n",
"- Ensure mathematical consistency (results of divisions need to be integers)\n",
"- Create natural language question and answer texts\n",
"- Include metadata about the variables and solution\n",
"\n",
@ -118,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 33,
"metadata": {},
"outputs": [
{
@ -132,12 +132,16 @@
],
"source": [
"# create open-router client, place your OPENROUTER_API_KEY in .env file\n",
"# .env contents:\n",
"# OPENROUTER_API_KEY=sk-or-v1- ...\n",
"\n",
"%load_ext dotenv\n",
"%dotenv\n",
"import os\n",
"import re\n",
"from pathlib import Path\n",
"from typing import Any, Iterable, Optional\n",
"import json\n",
"from openai import OpenAI\n",
"from openai.types.chat import ChatCompletion, ChatCompletionMessageParam\n",
"import time\n",
@ -174,25 +178,16 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/koepf/code/open-thought/reasoning-gym/notebooks/../../../ml-gsm-symbolic/templates/symbolic\n"
"Reading templates from path: /home/koepf/code/open-thought/reasoning-gym/notebooks/../../../ml-gsm-symbolic/templates/symbolic\n",
"len of python source: 2389\n"
]
},
{
"data": {
"text/plain": [
"'from random import Random\\nfrom typing import Dict, Any\\n\\ndef generate_from_variables(item: str, n1: int, p: int, c1: str, c2: str, c3: str) -> Dict[str, Any]:\\n more_cards = int(p/100 * n1)\\n n2 = n1 + more_cards\\n n3 = n1 + n2\\n total = n3 + n3\\n\\n question = f\"In a set of {item}\\'s cards, there are {n1} {c1} cards, and {p}% more {c2} cards. {c3} cards are as many as the sum of {c1} and {c2} cards. How many cards of all mentioned colors are there?\"\\n\\n answer_cot = f\"There are {p}/100 * {n1} = {more_cards} more {c2} cards than {c1} cards.\\\\n\" \\\\\\n f\"Which means there are {n1} + {more_cards} = {n2} {c2} cards.\\\\n\" \\\\\\n f\"{c3} cards make up to {n1} + {n2} = {n3} cards.\\\\n\" \\\\\\n f\"So in total, there are {n3} + {n3} = {total} cards of different colors.\\\\n\" \\\\\\n f\"#### {total}\"\\n\\n return {\\n \\'question\\': question,\\n \\'answer\\': str(total),\\n \\'answer_cot\\': answer_cot,\\n \\'answer_value\\': total,\\n \\'variables\\': {\\n \\'item\\': item,\\n \\'n1\\': n1,\\n \\'p\\': p,\\n \\'c1\\': c1,\\n \\'c2\\': c2, \\n \\'c3\\': c3,\\n \\'more_cards\\': more_cards,\\n \\'total\\': total\\n }\\n }\\n\\ndef generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:\\n items = [\"magician\", \"artist\", \"chef\", \"scientist\", \"athlete\"]\\n colors = [\"red\", \"blue\", \"green\", \"yellow\", \"purple\", \"orange\"]\\n \\n item = rng.choice(items)\\n c1, c2, c3 = rng.sample(colors, 3)\\n \\n n1 = int(rng.randint(20, int(81 * difficulty)))\\n \\n # Generate p ensuring p/100 * n1 is an integer\\n while True:\\n p = int(rng.randint(20, min(90, int(90 * difficulty))))\\n if (p/100 * n1).is_integer():\\n break\\n \\n result = generate_from_variables(item, n1, p, c1, c2, c3)\\n \\n return {\\n \\'question\\': result[\\'question\\'],\\n \\'answer\\': result[\\'answer\\'],\\n \\'metadata\\': {\\n \\'difficulty\\': difficulty,\\n \\'answer_value\\': result[\\'answer_value\\'],\\n \\'answer_cot\\': result[\\'answer_cot\\'],\\n \\'variables\\': result[\\'variables\\']\\n }\\n }\\n\\ndef original_example() -> Dict[str, Any]:\\n return generate_from_variables(\"magician\", 15, 60, \"red\", \"green\", \"yellow\")\\n'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@ -205,9 +200,9 @@
" return prompt\n",
" \n",
"\n",
"def produce_generator(json_path: Path):\n",
" json = json_path.read_text()\n",
" user_request = prompt_template.format(json)\n",
"def eval_prompt_template(input: str):\n",
" \n",
" user_request = prompt_template.format(input)\n",
" \n",
" input_messages = generate_simple_request(user_prompt=user_request)\n",
" output = llm_generate(open_router_client, input_messages, sampling_params)\n",
@ -224,13 +219,48 @@
"template_files = list(path_to_gsmsym.glob(\"*.json\"))\n",
"\n",
"# for testing just do it for the first entry\n",
"response_text = produce_generator(template_files[0])\n",
"response_text = eval_prompt_template(template_files[0].read_text())\n",
"\n",
"# extract python source section\n",
"result_match = re.search(r\"^```.*\\n((.*\\n)+)```\", response_text, flags=re.MULTILINE)\n",
"\n",
"pytho_source = result_match.group(1)\n",
"pytho_source"
"\n",
"python_source = result_match.group(1)\n",
"print(\"len of python source: \", len(python_source))"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In a set of magician's cards, there are 15 red cards, and 60% more green cards. yellow cards are as many as the sum of red and green cards. How many cards of all mentioned colors are there?\n",
"In a set of magicians cards, there are 15 red cards, and 60% more green cards. Yellow cards are as many, as the sum of red and green cards. How many cards of all mentioned colors are there?\n"
]
}
],
"source": [
"# WARNING: We are now executing the llm response without sandbox environment!\n",
"\n",
"scope = {} # eval generated python code here\n",
"\n",
"try:\n",
" exec(python_source, scope, scope)\n",
"except Exception as err:\n",
" raise\n",
"\n",
"\n",
"exec(\"output = original_example()\", scope, scope)\n",
"generated_data = scope[\"output\"]\n",
"print(generated_data['question'])\n",
"\n",
"\n",
"original_data = json.loads(template_files[0].read_text())\n",
"print(original_data['question'])\n"
]
}
],