reasoning-gym/notebooks/check-collisions-in-reasoning-gym-dataset.ipynb

345 lines
20 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"id": "42323371-404e-4e86-b8b8-a420b4c79303",
"metadata": {},
"outputs": [],
"source": [
"import reasoning_gym"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "f06e7932-6c77-4609-8a33-7c4d815841d6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of data: 56\n"
]
}
],
"source": [
"with open(\"data.txt\") as f:\n",
" data_names = f.readlines()\n",
" data_names = [name.strip() for name in data_names]\n",
" print(\"Total number of data: \", len(data_names))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "d7a5a5bf-7428-46f5-a7f5-a46238df2543",
"metadata": {},
"outputs": [],
"source": [
"TOTAL = 10000\n",
"collisions = []"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "7138aced-d61a-4e2a-9935-a9b251e6d554",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Count, 0 1\n",
"Count, 0 2\n",
"Count, 0 3\n",
"Count, 0 4\n",
"Count, 0 5\n",
"Count, 0 6\n",
"Count, 0 7\n",
"Count, 0 8\n",
"Count, 0 9\n",
"Count, 0 10\n",
"Count, 0 11\n",
"Count, 0 12\n",
"Count, 0 13\n",
"Count, 0 14\n",
"Count, 0 15\n",
"Count, 0 16\n",
"Count, 0 17\n",
"Count, 0 18\n",
"Count, 0 19\n",
"Count, 0 20\n",
"Count, 0 21\n",
"Count, 0 22\n",
"Count, 0 23\n",
"Count, 0 24\n",
"Count, 0 25\n",
"Count, 0 26\n",
"Count, 0 27\n",
"Count, 0 28\n",
"Count, 0 29\n",
"Count, 0 30\n",
"Count, 0 31\n",
"Count, 0 32\n",
"Count, 0 33\n",
"Count, 0 34\n",
"Count, 0 35\n",
"Count, 0 36\n",
"Count, 0 37\n",
"Count, 0 38\n",
"Count, 0 39\n",
"Count, 0 40\n",
"Count, 0 41\n",
"Count, 0 42\n",
"Count, 0 43\n",
"Count, 0 44\n",
"Count, 0 45\n",
"Count, 0 46\n",
"Count, 0 47\n",
"Count, 0 48\n",
"Count, 0 49\n",
"Count, 0 50\n",
"Count, 0 51\n",
"Count, 0 52\n",
"Count, 0 53\n",
"Count, 0 54\n",
"Count, 0 55\n",
"Count, 0 56\n",
"Count, 0 57\n",
"Count, 0 58\n",
"Count, 0 59\n",
"Count, 0 60\n",
"Count, 0 61\n",
"Count, 0 62\n",
"Count, 0 63\n",
"Count, 0 64\n",
"Count, 0 65\n",
"Count, 0 66\n",
"Count, 0 67\n",
"Count, 0 68\n",
"Count, 0 69\n",
"Count, 0 70\n",
"Count, 0 71\n",
"Count, 0 72\n",
"Count, 0 73\n",
"Count, 0 74\n",
"Count, 0 75\n",
"Count, 0 76\n",
"Count, 0 77\n",
"Count, 0 78\n",
"Count, 0 79\n",
"Count, 0 80\n",
"Count, 0 81\n",
"Count, 0 82\n",
"Count, 0 83\n",
"Count, 0 84\n",
"Count, 0 85\n",
"Count, 0 86\n",
"Count, 0 87\n",
"Count, 0 88\n",
"Count, 0 89\n",
"Count, 0 90\n",
"Count, 0 91\n",
"Count, 0 92\n",
"Count, 0 93\n",
"Count, 0 94\n",
"Count, 0 95\n",
"Count, 0 96\n",
"Count, 0 97\n",
"Count, 0 98\n",
"Count, 0 99\n",
"Count, 0 100\n",
"Count, 0 101\n",
"Count, 0 102\n",
"Count, 0 103\n",
"Count, 0 104\n"
]
},
{
"ename": "IndexError",
"evalue": "Cannot choose from an empty sequence",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:5\u001b[0m\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/dataset.py:44\u001b[0m, in \u001b[0;36mProceduralDataset.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_current_idx \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize:\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m\n\u001b[0;32m---> 44\u001b[0m item \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_current_idx]\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_current_idx \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m item\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:90\u001b[0m, in \u001b[0;36mBasicArithmeticDataset.__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 87\u001b[0m num_digits \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmin_digits, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmax_digits)\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mallow_parentheses:\n\u001b[0;32m---> 90\u001b[0m expression, result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate_complex_task(rng, num_terms, num_digits)\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 92\u001b[0m expression, result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate_simple_task(rng, num_terms, num_digits)\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:178\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task\u001b[0;34m(self, rng, num_terms, num_digits)\u001b[0m\n\u001b[1;32m 174\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(divisor))\n\u001b[1;32m 176\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m left_parts\n\u001b[0;32m--> 178\u001b[0m parts \u001b[38;5;241m=\u001b[39m add_terms(num_terms)\n\u001b[1;32m 180\u001b[0m \u001b[38;5;66;03m# Add whitespace according to config\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mwhitespace \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno_space\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:126\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 125\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mextend(add_terms(num_left))\n\u001b[1;32m 127\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:141\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m op \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 140\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(op)\n\u001b[0;32m--> 141\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mextend(add_terms(num_right))\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# left part has parantheses or no division\u001b[39;00m\n\u001b[1;32m 144\u001b[0m dividend \u001b[38;5;241m=\u001b[39m eval_floordiv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(left_parts) \u001b[38;5;28;01mif\u001b[39;00m left_parts[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m left_parts[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:164\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 161\u001b[0m right_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m divisor \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mchoice(find_common_divisors(dividend, \u001b[38;5;241m0\u001b[39m))\n\u001b[1;32m 165\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(divisor))\n\u001b[1;32m 166\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m+\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/opt/anaconda3/lib/python3.12/random.py:347\u001b[0m, in \u001b[0;36mRandom.choice\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;66;03m# As an accommodation for NumPy, we don't use \"if not seq\"\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;66;03m# because bool(numpy.array()) raises a ValueError.\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(seq):\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCannot choose from an empty sequence\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m seq[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow(\u001b[38;5;28mlen\u001b[39m(seq))]\n",
"\u001b[0;31mIndexError\u001b[0m: Cannot choose from an empty sequence"
]
}
],
"source": [
"%%time\n",
"for name in data_names:\n",
" data_1 = reasoning_gym.create_dataset(name, size=TOTAL, seed=1)\n",
" data_2 = reasoning_gym.create_dataset(name, size=TOTAL, seed=2)\n",
" count, index = 0, 0\n",
" for item_1, item_2 in zip(data_1, data_2):\n",
" index += 1\n",
" print(\"Count, \", count, index)\n",
" if item_1[\"question\"] == item_2[\"question\"]:\n",
" count += 1\n",
"\n",
" # Add name, count to collisions.txt\n",
" with open('collisions_1.txt', 'a') as file:\n",
" file.write(f\"{name}, {count}\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "933d03a5-7d97-49fc-beea-bb7bfa5fe941",
"metadata": {},
"outputs": [],
"source": [
"data_1 = reasoning_gym.create_dataset(\"basic_arithmetic\", size=TOTAL, seed=1)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "897a0417-3211-4735-aafc-4c33b3c79d19",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10000"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data_1)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "9c8561c7-a2da-405f-976e-ffb929d54c73",
"metadata": {},
"outputs": [],
"source": [
"data_2 = reasoning_gym.create_dataset(\"basic_arithmetic\", size=TOTAL, seed=2)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "ba0dd690-8448-42ff-bb2c-55350928abd8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10000"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data_2)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f3d1b1f6-bcf6-4f70-9168-33b3179ae171",
"metadata": {},
"outputs": [
{
"ename": "IndexError",
"evalue": "Cannot choose from an empty sequence",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[33], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data_1[\u001b[38;5;241m105\u001b[39m]\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:90\u001b[0m, in \u001b[0;36mBasicArithmeticDataset.__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 87\u001b[0m num_digits \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmin_digits, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmax_digits)\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mallow_parentheses:\n\u001b[0;32m---> 90\u001b[0m expression, result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate_complex_task(rng, num_terms, num_digits)\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 92\u001b[0m expression, result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate_simple_task(rng, num_terms, num_digits)\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:178\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task\u001b[0;34m(self, rng, num_terms, num_digits)\u001b[0m\n\u001b[1;32m 174\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(divisor))\n\u001b[1;32m 176\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m left_parts\n\u001b[0;32m--> 178\u001b[0m parts \u001b[38;5;241m=\u001b[39m add_terms(num_terms)\n\u001b[1;32m 180\u001b[0m \u001b[38;5;66;03m# Add whitespace according to config\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mwhitespace \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno_space\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:126\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 125\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mextend(add_terms(num_left))\n\u001b[1;32m 127\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:141\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m op \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 140\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(op)\n\u001b[0;32m--> 141\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mextend(add_terms(num_right))\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# left part has parantheses or no division\u001b[39;00m\n\u001b[1;32m 144\u001b[0m dividend \u001b[38;5;241m=\u001b[39m eval_floordiv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(left_parts) \u001b[38;5;28;01mif\u001b[39;00m left_parts[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m left_parts[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n",
"File \u001b[0;32m~/Desktop/deep-learning/reasoning-gym/reasoning_gym/arithmetic/basic_arithmetic.py:164\u001b[0m, in \u001b[0;36mBasicArithmeticDataset._generate_complex_task.<locals>.add_terms\u001b[0;34m(remaining)\u001b[0m\n\u001b[1;32m 161\u001b[0m right_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m divisor \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mchoice(find_common_divisors(dividend, \u001b[38;5;241m0\u001b[39m))\n\u001b[1;32m 165\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(divisor))\n\u001b[1;32m 166\u001b[0m left_parts\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m+\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m/opt/anaconda3/lib/python3.12/random.py:347\u001b[0m, in \u001b[0;36mRandom.choice\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;66;03m# As an accommodation for NumPy, we don't use \"if not seq\"\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;66;03m# because bool(numpy.array()) raises a ValueError.\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(seq):\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCannot choose from an empty sequence\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m seq[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_randbelow(\u001b[38;5;28mlen\u001b[39m(seq))]\n",
"\u001b[0;31mIndexError\u001b[0m: Cannot choose from an empty sequence"
]
}
],
"source": [
"data_1[105]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "74d31167-2d03-4844-b332-3e5cca5c0c23",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'question': 'Calculate -( -( -8861 * -813 ) ) * -3862.',\n",
" 'answer': '-27821820966',\n",
" 'metadata': {'source_dataset': 'basic_arithmetic',\n",
" 'source_index': 105,\n",
" 'expression': '-( -( -8861 * -813 ) ) * -3862',\n",
" 'num_terms': 3,\n",
" 'num_digits': 4,\n",
" 'difficulty': {'num_terms': (2, 6), 'num_digits': (1, 4)}}}"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_2[105]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51263e94-0867-4b2a-b205-ebafb316f811",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}