diff --git a/notebooks/gsm_symbolic.ipynb b/notebooks/gsm_symbolic.ipynb index b49da0de..2eb6b398 100644 --- a/notebooks/gsm_symbolic.ipynb +++ b/notebooks/gsm_symbolic.ipynb @@ -2,13 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "prompt_template = \"\"\"You need to generate python code for a synthetic procedural dataset. The dataset is similar to OpenAI's GSM8K which contains grade-school level math questions in natural language.\n", "\n", - "Here is a the SOURCE item from the dataset which you should translate into a python generator:\n", + "Here is a the SOURCE item which you should translate into a python generator:\n", "\n", "```json\n", "{0}\n", @@ -42,7 +42,7 @@ "Your task:\n", "\n", "- Generate reasonable random values for all the variables\n", - "- Ensure mathematical consistency (total distance is divisible by distance per interval)\n", + "- Ensure mathematical consistency (results of divisions need to be integers)\n", "- Create natural language question and answer texts\n", "- Include metadata about the variables and solution\n", "\n", @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -132,12 +132,16 @@ ], "source": [ "# create open-router client, place your OPENROUTER_API_KEY in .env file\n", + "# .env contents:\n", + "# OPENROUTER_API_KEY=sk-or-v1- ...\n", + "\n", "%load_ext dotenv\n", "%dotenv\n", "import os\n", "import re\n", "from pathlib import Path\n", "from typing import Any, Iterable, Optional\n", + "import json\n", "from openai import OpenAI\n", "from openai.types.chat import ChatCompletion, ChatCompletionMessageParam\n", "import time\n", @@ -174,25 +178,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/koepf/code/open-thought/reasoning-gym/notebooks/../../../ml-gsm-symbolic/templates/symbolic\n" + "Reading templates from path: /home/koepf/code/open-thought/reasoning-gym/notebooks/../../../ml-gsm-symbolic/templates/symbolic\n", + "len of python source: 2389\n" ] - }, - { - "data": { - "text/plain": [ - "'from random import Random\\nfrom typing import Dict, Any\\n\\ndef generate_from_variables(item: str, n1: int, p: int, c1: str, c2: str, c3: str) -> Dict[str, Any]:\\n more_cards = int(p/100 * n1)\\n n2 = n1 + more_cards\\n n3 = n1 + n2\\n total = n3 + n3\\n\\n question = f\"In a set of {item}\\'s cards, there are {n1} {c1} cards, and {p}% more {c2} cards. {c3} cards are as many as the sum of {c1} and {c2} cards. How many cards of all mentioned colors are there?\"\\n\\n answer_cot = f\"There are {p}/100 * {n1} = {more_cards} more {c2} cards than {c1} cards.\\\\n\" \\\\\\n f\"Which means there are {n1} + {more_cards} = {n2} {c2} cards.\\\\n\" \\\\\\n f\"{c3} cards make up to {n1} + {n2} = {n3} cards.\\\\n\" \\\\\\n f\"So in total, there are {n3} + {n3} = {total} cards of different colors.\\\\n\" \\\\\\n f\"#### {total}\"\\n\\n return {\\n \\'question\\': question,\\n \\'answer\\': str(total),\\n \\'answer_cot\\': answer_cot,\\n \\'answer_value\\': total,\\n \\'variables\\': {\\n \\'item\\': item,\\n \\'n1\\': n1,\\n \\'p\\': p,\\n \\'c1\\': c1,\\n \\'c2\\': c2, \\n \\'c3\\': c3,\\n \\'more_cards\\': more_cards,\\n \\'total\\': total\\n }\\n }\\n\\ndef generate_example(rng: Random, difficulty: float = 1.0) -> Dict[str, Any]:\\n items = [\"magician\", \"artist\", \"chef\", \"scientist\", \"athlete\"]\\n colors = [\"red\", \"blue\", \"green\", \"yellow\", \"purple\", \"orange\"]\\n \\n item = rng.choice(items)\\n c1, c2, c3 = rng.sample(colors, 3)\\n \\n n1 = int(rng.randint(20, int(81 * difficulty)))\\n \\n # Generate p ensuring p/100 * n1 is an integer\\n while True:\\n p = int(rng.randint(20, min(90, int(90 * difficulty))))\\n if (p/100 * n1).is_integer():\\n break\\n \\n result = generate_from_variables(item, n1, p, c1, c2, c3)\\n \\n return {\\n \\'question\\': result[\\'question\\'],\\n \\'answer\\': result[\\'answer\\'],\\n \\'metadata\\': {\\n \\'difficulty\\': difficulty,\\n \\'answer_value\\': result[\\'answer_value\\'],\\n \\'answer_cot\\': result[\\'answer_cot\\'],\\n \\'variables\\': result[\\'variables\\']\\n }\\n }\\n\\ndef original_example() -> Dict[str, Any]:\\n return generate_from_variables(\"magician\", 15, 60, \"red\", \"green\", \"yellow\")\\n'" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -205,9 +200,9 @@ " return prompt\n", " \n", "\n", - "def produce_generator(json_path: Path):\n", - " json = json_path.read_text()\n", - " user_request = prompt_template.format(json)\n", + "def eval_prompt_template(input: str):\n", + " \n", + " user_request = prompt_template.format(input)\n", " \n", " input_messages = generate_simple_request(user_prompt=user_request)\n", " output = llm_generate(open_router_client, input_messages, sampling_params)\n", @@ -224,13 +219,48 @@ "template_files = list(path_to_gsmsym.glob(\"*.json\"))\n", "\n", "# for testing just do it for the first entry\n", - "response_text = produce_generator(template_files[0])\n", + "response_text = eval_prompt_template(template_files[0].read_text())\n", "\n", "# extract python source section\n", "result_match = re.search(r\"^```.*\\n((.*\\n)+)```\", response_text, flags=re.MULTILINE)\n", "\n", - "pytho_source = result_match.group(1)\n", - "pytho_source" + "\n", + "python_source = result_match.group(1)\n", + "print(\"len of python source: \", len(python_source))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In a set of magician's cards, there are 15 red cards, and 60% more green cards. yellow cards are as many as the sum of red and green cards. How many cards of all mentioned colors are there?\n", + "In a set of magicians cards, there are 15 red cards, and 60% more green cards. Yellow cards are as many, as the sum of red and green cards. How many cards of all mentioned colors are there?\n" + ] + } + ], + "source": [ + "# WARNING: We are now executing the llm response without sandbox environment!\n", + "\n", + "scope = {} # eval generated python code here\n", + "\n", + "try:\n", + " exec(python_source, scope, scope)\n", + "except Exception as err:\n", + " raise\n", + "\n", + "\n", + "exec(\"output = original_example()\", scope, scope)\n", + "generated_data = scope[\"output\"]\n", + "print(generated_data['question'])\n", + "\n", + "\n", + "original_data = json.loads(template_files[0].read_text())\n", + "print(original_data['question'])\n" ] } ],