mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-19 12:58:04 +00:00
105 lines
No EOL
3.2 KiB
Python
Executable file
105 lines
No EOL
3.2 KiB
Python
Executable file
from internbootcamp.bootcamp.base import Basebootcamp
|
|
import traceback
|
|
import re
|
|
import json
|
|
from .codeio_utils import *
|
|
import os
|
|
from datasets import load_dataset
|
|
|
|
python_path = "python"
|
|
run_path = "./python_tmp"
|
|
|
|
|
|
def evaluate_codeio(completion, reference_answer):
|
|
# try to get code solution from completion. if the completion is pure code, this will not take effect.
|
|
|
|
# print("completion",completion)
|
|
# print("******************")
|
|
# print("reference_answer",reference_answer)
|
|
# print("******************")
|
|
if not os.path.exists(run_path):
|
|
#print(f"文件夹 '{run_path}' 不存在,正在创建...")
|
|
try:
|
|
os.makedirs(run_path)
|
|
#print(f"文件夹 '{run_path}' 已成功创建!")
|
|
except Exception as e:
|
|
print(f"创建文件夹时发生错误: {str(e)}")
|
|
assert type(reference_answer) == str
|
|
|
|
reference_answer = json.loads(reference_answer)
|
|
|
|
last_json = extract_last_complete_json(completion)
|
|
if last_json is None:
|
|
format_correctness = False
|
|
return False , format_correctness
|
|
|
|
if reference_answer['io_pred'] == "output":
|
|
if not isinstance(last_json, dict):
|
|
return False, False
|
|
if "output" not in last_json:
|
|
return False, False
|
|
pred_output = last_json["output"]
|
|
# print("pred_output",pred_output)
|
|
# print("reference_answer[output]",reference_answer["output"])
|
|
acc = is_close(pred_output, reference_answer["output"])
|
|
if acc:
|
|
return True, True
|
|
else:
|
|
return False, True
|
|
elif reference_answer['io_pred'] == "input":
|
|
if not isinstance(last_json, dict):
|
|
return False, False
|
|
if "input" not in last_json:
|
|
return False, False
|
|
pred_input = last_json["input"]
|
|
|
|
candio = {'input': pred_input, 'output': reference_answer['output']}
|
|
res = check_input(reference_answer['refcode'], candio, reference_answer['funcname'], solution_prefix=solution_prefix, runtime_limit=5, used_python_path = python_path, run_path=run_path)
|
|
if res['status'] == 'success':
|
|
return True, True
|
|
else:
|
|
return False, True
|
|
|
|
|
|
|
|
def compute_score(solution_str, ground_truth, method='strict'):
|
|
"""
|
|
Scoring function for instruction following task
|
|
"""
|
|
|
|
reference_answer = ground_truth['gt']
|
|
|
|
answer = solution_str
|
|
|
|
if answer is None:
|
|
correctness = False
|
|
return correctness
|
|
|
|
correctness , _ = evaluate_codeio(answer, reference_answer=reference_answer)
|
|
|
|
assert isinstance(correctness, bool), f"correctness is not bool, is {type(correctness)}"
|
|
|
|
return correctness
|
|
|
|
|
|
class CodeIObootcamp(Basebootcamp):
|
|
def __init__(self, **params):
|
|
super().__init__(**params)
|
|
|
|
|
|
# no case generator for instruction following
|
|
def case_generator(self):
|
|
pass
|
|
|
|
@staticmethod
|
|
def prompt_func(case) -> str:
|
|
return case['prompt']
|
|
|
|
@staticmethod
|
|
def extract_output(output):
|
|
return output
|
|
|
|
@classmethod
|
|
def _verify_correction(cls, solution, identity) -> bool:
|
|
correctness = compute_score(solution_str=solution, ground_truth=identity)
|
|
return float(correctness) |