Draft CodeIO-derived reasoning problems dataset

2026-04-19 12:58:07 +00:00 · 2025-02-22 00:56:52 +00:00 · 2025-02-22 00:56:52 +00:00 · e718168428
commit e718168428
parent 563480329e
2 changed files with 65 additions and 5 deletions
--- a/reasoning_gym/code/codeio.py
+++ b/reasoning_gym/code/codeio.py
@ -1,9 +1,34 @@
+# TODO: consider whether this belongs in the "code" directory
+import json
 from dataclasses import dataclass
+from pathlib import Path
 from random import Random
 from typing import Any, Optional

 from ..factory import ProceduralDataset, register_dataset

+OUTPUT_PREDICTION_PROMPT_TEMPLATE = """
+You are given a question that requires some input and output variables as follows:
+
+{0}
+
+The input and output requirements are as follows:
+
+{1}
+
+Given the following input:
+
+{2}
+
+Can you predict the output without writing any code? Please think and then provide only the exact output as your final answer, which should strictly match the output requirement as specified.
+
+Tip: Here is a reference code snippet for this question. You can refer to this code to guide your reasoning but not copy spans of code directly.
+
+{3}
+"""
+
+# TODO: also add input prediction prompt
+

@dataclass
 class CodeIOConfig:
@ -35,13 +60,46 @@ class CodeIODataset(ProceduralDataset):
        self._current_idx += 1
        return item

+    def _generate_io_pairs(self, main_code: str, input_generator_code: str, num_pairs: int = 1):
+        local_vars = {}
+        exec(main_code, {}, local_vars)
+        exec(input_generator_code, {}, local_vars)
+        io_pairs = []
+        for _ in range(num_pairs):
+            inputs = local_vars["input_generator"]()
+            outputs = local_vars["main"](**inputs)
+            io_pairs.append((inputs, outputs))
+        return io_pairs
+
    def __getitem__(self, idx: int) -> dict:
-        """Generate a single mini sudoku puzzle"""
+        """Generate a single CodeI/O reasoning task"""
        rng = Random(self.seed + idx)

-        # TODO
-        question = ""
-        solution = ""
+        # TODO: load data from external source (HuggingFace dataset?)
+        jsonl_path = Path("data/codeio.jsonl")
+
+        # Avoid loading the entire file into memory in case it's large
+        with open(jsonl_path, "r", encoding="utf-8") as f:
+            num_lines = sum(1 for _ in f)
+            random_line_number = rng.randint(0, num_lines - 1)
+
+            f.seek(0)
+            for current_line_number, line in enumerate(f):
+                if current_line_number == random_line_number:
+                    json_data = json.loads(line.strip())
+
+        query = json_data["query"]
+        parameters = json_data["parameters"]
+        reference_code = json_data["reference_code"]
+        input_generator_code = json_data["input_generator"]
+
+        input_data, output_data = self._generate_io_pairs(reference_code, input_generator_code, num_pairs=1)[0]
+
+        # TODO add chance of input prediction rather than output
+
+        question = OUTPUT_PREDICTION_PROMPT_TEMPLATE.format(query, parameters, input_data, reference_code)
+        # TODO: consider changing format here
+        solution = output_data

        return {
            "question": question,
@ -50,6 +108,7 @@ class CodeIODataset(ProceduralDataset):
        }

    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        # TODO: better answer scoring
        oracle_answer = entry["answer"].strip()
        reward = 0.0
        if answer is not None and len(answer) > 0: