reasoning-gym/reasoning_gym/code/codeio.py

import gzip
import json
from dataclasses import dataclass
from pathlib import Path
from random import Random
from typing import Any, Optional

from ..factory import ProceduralDataset, register_dataset

OUTPUT_PREDICTION_PROMPT_TEMPLATE = """
You are given a question that requires some input and output variables as follows:

{0}

The input and output requirements are as follows:

{1}

Given the following input:

{2}

Can you predict the output without writing any code? Please think and then provide the exact output in the form of a JSON object as your final answer. The keys and values of the object should strictly match the output requirement as specified.

Tip: Here is a reference code snippet for this question. You can refer to this code to guide your reasoning but not copy spans of code directly.

{3}
"""

INPUT_PREDICTION_PROMPT_TEMPLATE = """
You are given a question that requires some input and output variables as follows:

{0}

The input and output requirements are as follows:

{1}

Given the following output:

{2}

Can you predict a feasible input without writing any code? Please reason and put your final answer in the form of a JSON object, even if the there is only one input variable, with keys strictly matching the input variables' names as specified.

Tip: Here is a reference code snippet for this question. You can refer to this code to guide your reasoning but not copy spans of code directly.

{3}
"""


@dataclass
class CodeIOConfig:
    """Configuration for CodeI/O reasoning task generation"""

    seed: Optional[int] = None
    size: int = 500
    input_prediction_probability: float = 0.5

    def validate(self) -> None:
        """Validate configuration parameters"""
        assert 0.0 <= self.input_prediction_probability <= 1.0, "input_prediction_probability must be in [0, 1]"


class CodeIODataset(ProceduralDataset):
    def __init__(self, config: CodeIOConfig):
        super().__init__(config=config, seed=config.seed, size=config.size)

        self._data_path = Path(__file__).parent / "contrib/codeio/data.jsonl.gz"
        self._offsets = []

        # Index line byte offsets in the CodeI/O data file for fast random access
        with gzip.open(self._data_path, "rt", encoding="utf-8") as f:
            while True:
                offset, line = f.tell(), f.readline()
                if not line:
                    break
                self._offsets.append(offset)

    def __len__(self) -> int:
        return self.config.size

    def __iter__(self):
        self._current_idx = 0
        return self

    def __next__(self):
        if self._current_idx >= self.config.size:
            raise StopIteration
        item = self[self._current_idx]
        self._current_idx += 1
        return item

    def _generate_io_pairs(self, main_code: str, input_generator_code: str, num_pairs: int = 1):
        local_vars = {}
        exec(main_code, {}, local_vars)
        exec(input_generator_code, {}, local_vars)
        io_pairs = []
        for _ in range(num_pairs):
            inputs = local_vars["input_generator"]()
            outputs = local_vars["main"](**inputs)
            io_pairs.append((inputs, outputs))
        return io_pairs

    def __getitem__(self, idx: int) -> dict:
        """Generate a single CodeI/O reasoning task"""
        rng = Random(self.seed + idx)

        random_offset = rng.choice(self._offsets)
        with gzip.open(self._data_path, "rt", encoding="utf-8") as f:
            f.seek(random_offset)
            json_data = json.loads(f.readline().strip())

        query = json_data["query"]
        parameters = json_data["parameters"]
        reference_code = json_data["reference_code"]
        input_generator_code = json_data["input_generator"]

        input_data, output_data = self._generate_io_pairs(reference_code, input_generator_code, num_pairs=1)[0]

        if rng.random() < self.config.input_prediction_probability:
            question = OUTPUT_PREDICTION_PROMPT_TEMPLATE.format(query, parameters, input_data, reference_code)
            solution = json.dumps(output_data)
        else:
            question = INPUT_PREDICTION_PROMPT_TEMPLATE.format(query, parameters, output_data, reference_code)
            solution = json.dumps(input_data)

        return {
            "question": question,
            "answer": solution,
            "metadata": {},
        }

    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
        # TODO: this scoring could definitely be refined
        oracle_answer = entry["answer"].strip()
        reward = 0.0
        if answer is not None and len(answer) > 0:
            answer = answer.strip()
            if answer == oracle_answer:
                reward = 1.0
            elif "{" in answer and "}" in answer:
                # Check if the answer contains a correct format JSON object somewhere
                # But penalise for length & accuracy
                ans_first_open, ans_last_close = answer.index("{"), answer.rindex("}")
                extra_chars = len(answer[:ans_first_open]) + len(answer[ans_last_close + 1 :])

                try:
                    answer_dict = json.loads(answer[ans_first_open : ans_last_close + 1])
                    oracle_dict = json.loads(oracle_answer)
                    if answer_dict == oracle_dict:
                        # 0.5 is arbitrary here, but the answers are very short so it seems harsh to penalize too much
                        # e.g. if oracle is {"steps": "3"} and answer is "The correct answer is: {"steps": "3"}"
                        reward = max(len(oracle_answer) / (len(oracle_answer) + 0.5 * extra_chars), 0.2)
                    elif answer_dict.keys() == oracle_dict.keys():
                        # Wrong answer, but at least the right format
                        reward = 0.1
                    else:
                        # At least we got a JSON object, I guess?
                        reward = 0.05
                except json.JSONDecodeError:
                    if oracle_answer in answer:
                        reward = len(oracle_answer) / len(answer)
            elif oracle_answer in answer:
                # max() to avoid penalising too heavily, since correct answers are short here
                reward = max(len(oracle_answer) / len(answer), 0.2)
            else:
                reward = 0.01

        return reward


# Register the dataset
# register_dataset("codeio", CodeIODataset, CodeIOConfig)