mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
fix: correct dataset name to bigcode/humanevalpack
This commit is contained in:
parent
590e8a1ef2
commit
5c2afa8ea7
2 changed files with 6 additions and 6 deletions
|
|
@ -4,7 +4,7 @@ An Atropos RL environment for training LLMs to debug and fix buggy Python code.
|
|||
|
||||
## Overview
|
||||
|
||||
This environment uses the [HumanEvalFix](https://huggingface.co/datasets/bigcode/humanevalfix-python) dataset, which contains 164 buggy Python functions with associated test suites. The model receives a buggy function and must output the corrected version inside `\boxed{}`. Scoring is done by executing the fixed code against the original test cases.
|
||||
This environment uses the [HumanEvalPack](https://huggingface.co/datasets/bigcode/humanevalpack) dataset (Python subset, HumanEvalFix task), which contains 164 buggy Python functions with associated test suites. The model receives a buggy function and must output the corrected version inside `\boxed{}`. Scoring is done by executing the fixed code against the original test cases.
|
||||
|
||||
## Architecture
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ python code_debug_env.py evaluate \
|
|||
|
||||
## Dataset
|
||||
|
||||
- **Source**: [bigcode/humanevalfix-python](https://huggingface.co/datasets/bigcode/humanevalfix-python)
|
||||
- **Source**: [bigcode/humanevalpack](https://huggingface.co/datasets/bigcode/humanevalpack) (Python subset)
|
||||
- **License**: Apache 2.0
|
||||
- **Size**: 164 problems
|
||||
- **Split**: 80% train / 20% test
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
Code Debug Environment for Atropos
|
||||
|
||||
Trains LLMs to debug and fix buggy Python functions.
|
||||
Uses the HumanEvalFix dataset with execution-based verification
|
||||
Uses the HumanEvalPack dataset (HumanEvalFix subset) with execution-based verification
|
||||
against ground-truth test cases.
|
||||
|
||||
Environment pattern follows sql_query_env for consistency.
|
||||
|
|
@ -162,11 +162,11 @@ class CodeDebugEnv(BaseEnv):
|
|||
await super().wandb_log(wandb_metrics)
|
||||
|
||||
async def setup(self):
|
||||
"""Load the HumanEvalFix dataset and prepare train/test splits."""
|
||||
"""Load the HumanEvalPack dataset (HumanEvalFix) and prepare train/test splits."""
|
||||
from datasets import load_dataset
|
||||
|
||||
print("Loading HumanEvalFix dataset...")
|
||||
dataset = load_dataset("bigcode/humanevalfix-python", split="test")
|
||||
print("Loading HumanEvalPack (python) dataset...")
|
||||
dataset = load_dataset("bigcode/humanevalpack", "python", split="test")
|
||||
|
||||
all_items: List[CodeDebugItem] = []
|
||||
for row in dataset:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue