mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-28 17:29:37 +00:00
update to tech report version (#10)
* feat(run_eval): add checkpoint resume functionality and update example documentation; - update new bootcamp benchmark dataset * refactor(data_pipeline): optimize data generation pipeline; add multiple preset configurations for data generation * docs: update bootcamp list and add new scripts - Update Fulllist_InternBootcamp.md with new bootcamps and categories - Add new scripts to .gitignore: - examples/pipelines/filter_autogen_configs.py - examples/pipelines/quickgen_data_configs_from_eval_meta.py - Update dependencies in setup.py: - Add scipy and scikit-learn * refactor(internbootcamp): update bootcamp modules and improve error handling - Update import statements in __init__.py files - Add timestamp to target directory name in verl_data_preprocess.py - Improve error handling and scoring logic in bootcamp_judger.py - Remove unnecessary comments and update puzzle descriptions in multiple files
This commit is contained in:
parent
125a7818e0
commit
a8249acc18
2952 changed files with 105460 additions and 17649 deletions
|
|
@ -46,10 +46,12 @@ def last_boxed_only_string(string):
|
|||
return retval
|
||||
|
||||
class Medcalculatorbootcamp(Basebootcamp): # 医学计算器类
|
||||
def __init__(self, conf_file="./internbootcamp/libs/med_calculator/med_calculator.json", seed=None):
|
||||
def __init__(self, conf_file="./internbootcamp/libs/med_calculator/med_calculator.json", seed=None, add_rule_ratio=0.25):
|
||||
random.seed(seed)
|
||||
self.add_rule_ratio = add_rule_ratio
|
||||
with open(conf_file, "r", encoding="utf-8") as f:
|
||||
self.config = json.load(f)
|
||||
self.rules = self._construct_rules()
|
||||
|
||||
def _gen_a_case(self, category, name):
|
||||
details = self.config[category][name]
|
||||
|
|
@ -60,48 +62,59 @@ class Medcalculatorbootcamp(Basebootcamp): # 医学计算器类
|
|||
case 'equation':
|
||||
while 1:
|
||||
formula = details["formula"]
|
||||
for i in details["inputs"]:
|
||||
match indicators[i]["type"]:
|
||||
case "int":
|
||||
v = random.randint(*indicators[i]["range"])
|
||||
case "float":
|
||||
v = random.uniform(*indicators[i]["range"])
|
||||
if 'precision' in indicators[i]:
|
||||
v = round(v, indicators[i]["precision"])
|
||||
case "choice":
|
||||
v = random.choice(indicators[i]["range"])
|
||||
|
||||
t = i + str(v) + indicators[i].get("unit", "")
|
||||
inputs.append(t)
|
||||
formula = formula.replace(i, str(v))
|
||||
|
||||
try:
|
||||
for i in details["inputs"]:
|
||||
match indicators[i]["type"]:
|
||||
case "int":
|
||||
k = v = random.randint(*indicators[i]["range"])
|
||||
case "float":
|
||||
v = random.uniform(*indicators[i]["range"])
|
||||
if 'precision' in indicators[i]:
|
||||
v = round(v, indicators[i]["precision"])
|
||||
k = v
|
||||
case "choice":
|
||||
rg = indicators[i]["range"]
|
||||
if (t := type(rg)) is list:
|
||||
k = v = random.choice(indicators[i]["range"])
|
||||
elif t is dict:
|
||||
k = random.choice(list(rg.keys()))
|
||||
v = rg[k]
|
||||
|
||||
inp = i.split('[')[0] + str(k) + indicators[i].get("unit", "")
|
||||
inputs.append(inp)
|
||||
formula = formula.replace(i, str(v))
|
||||
|
||||
target = eval(formula)
|
||||
break
|
||||
except (ZeroDivisionError, ValueError):
|
||||
pass
|
||||
except Exception as e:
|
||||
e.args=(*e.args, name, '公式:'+details["formula"], '带入数值:'+formula)
|
||||
raise e
|
||||
# print(name, formula, details["formula"])
|
||||
# breakpoint()
|
||||
out_k = name.split('—')[-1]
|
||||
if 'precision' in indicators[out_k]:
|
||||
target = round(target, indicators[out_k]["precision"])
|
||||
case 'scale':
|
||||
target = 0
|
||||
for title, options in details["points"].items():
|
||||
if isinstance(options, dict):
|
||||
selected_option = random.choice(list(options.keys()))
|
||||
inputs.append(f'{title}: {selected_option}')
|
||||
target += options[selected_option]
|
||||
else:
|
||||
inputs.append(title)
|
||||
target += options
|
||||
try:
|
||||
for title, options in details["points"].items():
|
||||
if options['type'] == '单选':
|
||||
selected_option = random.choice(list(options['items'].keys()))
|
||||
inputs.append(f'{title}: {selected_option}')
|
||||
target += options['items'][selected_option]
|
||||
elif options['type'] == '多选':
|
||||
selected_options = random.sample(list(options['items'].keys()), k=random.randint(1, len(options['items'])))
|
||||
inputs.append(f'{title}: {"、".join(selected_options)}')
|
||||
target += sum(options['items'][opt] for opt in selected_options)
|
||||
except Exception as e:
|
||||
e.args=(*e.args, name)
|
||||
raise e
|
||||
|
||||
ret = {
|
||||
"category": category,
|
||||
"name": name,
|
||||
"inputs": inputs,
|
||||
"add_rule": random.random() < self.add_rule_ratio,
|
||||
"target": target,
|
||||
}
|
||||
return ret
|
||||
|
|
@ -111,8 +124,36 @@ class Medcalculatorbootcamp(Basebootcamp): # 医学计算器类
|
|||
name = random.choice(list(self.config[category].keys()))
|
||||
return self._gen_a_case(category, name)
|
||||
|
||||
def _construct_rules(self):
|
||||
rules = {}
|
||||
for name, details in self.config["equation"].items():
|
||||
formula = details["formula"]
|
||||
explanation_arr = []
|
||||
for i in details["inputs"]:
|
||||
indicator = self.config["indicator"][i]
|
||||
if indicator["type"] == "choice":
|
||||
j = i.split('[')[0]
|
||||
t = f'{j}:'+ ','.join([f'{k}:{v}' for k,v in indicator["range"].items()])
|
||||
formula = formula.replace(i, j)
|
||||
explanation_arr.append(t)
|
||||
formula = formula.replace('math.', '')
|
||||
|
||||
explanation = ''
|
||||
if explanation_arr:
|
||||
explanation = "公式说明:" + ';'.join(explanation_arr) + "\n"
|
||||
|
||||
rules[name] = f"{name}的计算公式:{formula}\n{explanation}\n"
|
||||
for name, details in self.config["scale"].items():
|
||||
rules[name] = f"{name}量表的评分标准:\n"
|
||||
for title, options in details["points"].items():
|
||||
rules[name] += f"[{title}][{options['type']}]\n"
|
||||
rules[name] += '\n'.join([f'{k}({v}分)' for k, v in options['items'].items()]) + "\n"
|
||||
rules[name] += "\n"
|
||||
return rules
|
||||
|
||||
def prompt_func(self, case):
|
||||
indicators = self.config["indicator"]
|
||||
random.shuffle(case["inputs"])
|
||||
inp_items = ','.join(case["inputs"])
|
||||
out_item = case["name"]
|
||||
|
||||
|
|
@ -123,7 +164,8 @@ class Medcalculatorbootcamp(Basebootcamp): # 医学计算器类
|
|||
if 'precision' in indicators[out_name]:
|
||||
other_item = f",保留{indicators[out_name]['precision']}位小数"
|
||||
|
||||
instruction = f"患者信息:{inp_items}。请计算{out_item}{other_item}。"
|
||||
rule = self.rules[case["name"]] if case["add_rule"] else ""
|
||||
instruction = f"{rule}患者信息:{inp_items}。请计算{out_item}{other_item}。"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{xxx:xxx}. For example "\\boxed{BMI: 20.5}"."""
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue