Merge branch 'fix-new-bootcamps' into 'main'

refactor(internbootcamps): standardize naming conventions and improve code structure

See merge request lilinyang/internbootcamp!13
This commit is contained in:
陈永康 2025-06-16 12:54:23 +00:00
commit 29f36b433c
14 changed files with 120 additions and 59 deletions

View file

@ -2,7 +2,7 @@ from rdkit import Chem
from rdkit.Chem import Crippen
from .InChI2logPBootCamp import InChI2logPbootcamp
class InChI2MRBootCamp(InChI2logPbootcamp):
class InChI2MRbootCamp(InChI2logPbootcamp):
def prompt_func(self, InChI) -> str:

View file

@ -8,13 +8,16 @@ from rdkit.Chem import Crippen
class InChI2logPbootcamp(Basebootcamp):
def __init__(self, max_atoms=15, min_atoms=3, elements=None, seed=None):
# super.__init__()
self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
self.max_atoms = max_atoms
self.min_atoms = min_atoms
# self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
# self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
def case_generator(self) -> str:
"""
生成一组数字和目标值
"""
self.InChIGenerator = InChIGenerator(max_atoms=self.max_atoms, min_atoms=self.min_atoms, elements=None, seed=None)
return self.InChIGenerator.generate_n_valid_inchi(1)[0]
def prompt_func(self, InChI) -> str:

View file

@ -4,9 +4,9 @@ from .utils import last_boxed_only_string, remove_boxed
from rdkit import Chem
from rdkit.Chem import Crippen
from .SMILES2logPBootCamp import SMILES2logPBootCamp
from .SMILES2logPBootCamp import SMILES2logPbootcamp
class SMILES2MRBootCamp(SMILES2logPBootCamp):
class SMILES2MRbootCamp(SMILES2logPbootcamp):
def prompt_func(self, SMILES) -> str:

View file

@ -6,16 +6,19 @@ from rdkit.Chem import Crippen
from .InChI2logPBootCamp import InChI2logPbootcamp
class SMILES2logPBootCamp(InChI2logPbootcamp):
class SMILES2logPbootcamp(InChI2logPbootcamp):
def __init__(self,min_len=5, max_len=25,
seed=None):
# super.__init__()
self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
self.min_len = min_len
self.max_len = max_len
# self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
def case_generator(self) -> str:
"""
生成一组数字和目标值
"""
self.SMILESGenerator = SMILESGenerator(min_len=self.min_len, max_len=self.max_len, seed=None)
return self.SMILESGenerator.generate_n_valid_smiles(1)[0]
def prompt_func(self, SMILES) -> str:

View file

@ -32,11 +32,14 @@ from .bbeh_boolean_expressions.bbeh_boolean_expressions import Bbehbooleanexpres
from .ChemStructure2Property.InChI2logPBootCamp import InChI2logPbootcamp
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRBootCamp
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPBootCamp
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRBootCamp
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRbootCamp
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPbootcamp
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRbootCamp
from .earth_dew2humidity.earth_dew2humidity import Earthdew2humiditybootcamp
from .linearODE.linearODE import LinearODEbootcamp
from .kakurasu.kakurasu import Kakurasubootcamp
from .nonograms.nonograms import Nonogramsbootcamp
from .hitori.hitori import Hitoribootcamp

View file

@ -4,7 +4,7 @@ import numpy as np
from internbootcamp.bootcamp.base import Basebootcamp
class earth_dew2humidity(Basebootcamp):
class Earthdew2humiditybootcamp(Basebootcamp):
def __init__(
self,
temperature_range=(-20, 40),
@ -37,49 +37,71 @@ class earth_dew2humidity(Basebootcamp):
"e = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65))\n"
"e_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65))\n"
"relative humidity = e / e_s * 100\n"
"只需返回 “relative humidity = ”"
"\\boxed{relative humidity = %} 格式输出你的最终答案,例如\\boxed{relative humidity = your answer%}"
)
@staticmethod
def extract_output(output: str) -> str:
# 用正则提取“relative humidity = …”右侧的表达式
m = re.search(r"relative humidity\s*=\s*([^\n\r]+)", output)
return m.group(1).strip() if m else None
def extract_output(output):
boxed_start_index = output.find('\\boxed{')
boxed_end_index = output.rfind('}', boxed_start_index)
boxed = output[boxed_start_index + 7:boxed_end_index]
# print(boxed)
# 提取数字(含小数点)
number_match = re.findall(r'\d+(?:\.\d+)?', boxed)[-1]
if number_match:
try:
return float(number_match)
except ValueError:
return None
return None
@classmethod
def _verify_correction(cls, solution: str, identity: dict) -> bool:
# 解析 LLM 给出的系数 c形如 “c*x”
solution = solution.replace(" ", "")
try:
c = float(solution)
except:
return False
# print(c)
# 验证 c ≈ k
return abs(c - identity["humidity"]) < 1e-2
return abs(c - identity["humidity"]) < 1e-1
if __name__ == "__main__":
bootcamp = earth_dew2humidity(seed=123)
# 生成几个样例
examples = [bootcamp.case_generator() for _ in range(3)]
print(examples)
print(bootcamp.prompt_func(examples[0]))
print(bootcamp.extract_output("xxxxx relative humidity = 111222 "))
solution = bootcamp.extract_output("xxxxx relative humidity = 84.79 ")
print(bootcamp._verify_correction(solution, examples[0]))
solution = bootcamp.extract_output("xxxxx relative humidity = 83.79 ")
print(bootcamp._verify_correction(solution, examples[0]))
# bootcamp = Earthdew2humiditybootcamp(seed=123)
# # 生成几个样例
# examples = [bootcamp.case_generator() for _ in range(3)]
# print(examples)
# print(bootcamp.prompt_func(examples[0]))
# print(bootcamp.extract_output("xxxxx relative humidity = 111222 "))
# solution = bootcamp.extract_output("xxxxx relative humidity = 84.79 ")
# print(bootcamp._verify_correction(solution, examples[0]))
# solution = bootcamp.extract_output("[answer]relative humidity = relative humidity = 83.79%[/answer]")
# print(bootcamp._verify_correction(solution, examples[0]))
for identity in examples:
# 构造“模型”返回答案,模拟 LLM 的输出
humidity = identity["humidity"]
sol = f"{humidity:.4f}"
# 调用 Basebootcamp 提供的 verify_score 接口进行验证
score = bootcamp.verify_score(sol, identity, short_threshold=1e-2)
# 打印结果
print(json.dumps({
"identity": identity,
"solution": sol,
"verify_score": score
}, ensure_ascii=False, indent=2))
# for identity in examples:
# # 构造“模型”返回答案,模拟 LLM 的输出
# humidity = identity["humidity"]
# sol = f"\\boxed{{relative humidity = {humidity:.4f}%}}"
# # 调用 Basebootcamp 提供的 verify_score 接口进行验证
# score = bootcamp.verify_score(sol, identity, short_penalty=False,format_penalty=False)
# # 打印结果
# print(json.dumps({
# "identity": identity,
# "solution": sol,
# "extract_output": Earthdew2humiditybootcamp.extract_output(sol),
# "verify_score": score
# }, ensure_ascii=False, indent=2))
case = {
"id": 49,
"prompt": "下面给出露点温度dewpoint=13.225845924995145 (摄氏度)\n温度temperature=13.427933720529904 (摄氏度)\n请计算湿度,计算公式为:\ndewpoint = dewpoint + 273.15temperature = temperature + 273.15\ne = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65))\ne_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65))\nrelative humidity = e / e_s * 100\n\\boxed{relative humidity = %} 格式输出你的最终答案,例如 \\boxed{relative humidity = your answer%}。",
"output_len": 130,
"score": 0,
"ground_truth": {
"dewpoint": 13.225845924995145,
"temperature": 13.427933720529904,
"humidity": 98.69041013549538
},
"output": "首先将露点温度和温度转换为绝对温度。露点温度为13.225845924995145摄氏度加上273.15得到露点温度的绝对温度即为286.37584592499516 K。同样地温度13.427933720529904摄氏度加上273.15得到绝对温度286.5779337205299 K。\n\n接下来计算露点温度对应的饱和水蒸气压。使用公式e = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65))其中dewpoint为286.37584592499516 K。计算得到e的值约为654.54 hPa。\n\n然后计算温度对应的饱和水蒸气压。使用公式e_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65))其中temperature为286.5779337205299 K。计算得到e_s的值约为656.53 hPa。\n\n最后计算相对湿度。相对湿度等于e除以e_s再乘以100即(654.54 / 656.53) * 100 ≈ 99.69%\n</think>\n\n好的,我们按照给定的公式计算相对湿度。\n\n**步骤如下:**\n\n1. **将露点温度和温度转换为绝对温度:**\n \n \\[\n \\text{dewpoint} = 13.225845924995145\\,^\\circ\\text{C} + 273.15 = 286.37584592499516\\,\\text{K}\n \\]\n \n \\[\n \\text{temperature} = 13.427933720529904\\,^\\circ\\text{C} + 273.15 = 286.5779337205299\\,\\text{K}\n \\]\n\n2. **计算露点温度对应的饱和水蒸气压 \\( e \\)**\n \n \\[\n e = 611.2 \\times \\exp\\left(\\frac{17.67 \\times (286.37584592499516 - 273.15)}{286.37584592499516 - 29.65}\\right) \\approx 654.54\\,\\text{hPa}\n \\]\n\n3. **计算温度对应的饱和水蒸气压 \\( e_s \\)**\n \n \\[\n e_s = 611.2 \\times \\exp\\left(\\frac{17.67 \\times (286.5779337205299 - 273.15)}{286.5779337205299 - 29.65}\\right) \\approx 656.53\\,\\text{hPa}\n \\]\n\n4. **计算相对湿度:**\n \n \\[\n \\text{relative humidity} = \\left( \\frac{e}{e_s} \\right) \\times 100 = \\left( \\frac{654.54}{656.53} \\right) \\times 100 \\approx 99.69\\%\n \\]\n\n**最终答案:**\n\n\\[\n\\boxed{\\text{relative humidity} = 99.69\\%}\n\\]"
}
print(Earthdew2humiditybootcamp.extract_output(case['output']))

View file

@ -5,7 +5,7 @@ from scipy.integrate import odeint
from internbootcamp.bootcamp.base import Basebootcamp
class LinearODEBootcamp(Basebootcamp):
class LinearODEbootcamp(Basebootcamp):
def __init__(
self,
k_range=(0.1, 1.0),
@ -38,41 +38,70 @@ class LinearODEBootcamp(Basebootcamp):
return (
f"下面给出变量 x(t) 的观测数据点:\n{points}\n\n"
"请找出其满足的微分方程形式为dx/dt = f(x)。\n"
"只需返回 “dx/dt = <表达式>”。"
"以dx/dt = <表达式>格式表示你的答案。"
"并且使用[answer]标签包裹你的最终答案, 例如[answer]dx/dt = <表达式>[/answer]."
)
@staticmethod
def extract_output(output: str) -> str:
# 用正则提取“dx/dt = …”右侧的表达式
m = re.search(r"dx/dt\s*=\s*([^\n\r]+)", output)
return m.group(1).strip() if m else None
def extract_output(output):
import re
matches = re.findall(r'\[answer\](.*?)\[/answer\]', output, re.DOTALL)
if not matches:
return None
last_match = matches[-1].strip()
try:
raw_expr = last_match.replace('dx/dt = ', '').strip()
expr = raw_expr.strip()
pattern = re.fullmatch(
r"""
([+-]?\s* # 可选的正负号,后可带空格
(?:\d+(?:\.\d*)? # 整数或小数点后数字
|\.\d+)? # 或只有小数部分
(?:[eE][+-]?\d+)? # 可选的科学计数部分
)? # 整个系数是可选的(允许直接 x 或 -x
\s*\*?\s* # 可选乘号,前后允许空格
[xX] # x 或 X
""",
expr,
re.VERBOSE
)
if pattern:
raw = pattern.group(1)
if raw is None or raw.strip() == '':
return 1.0
elif raw.strip() in ['+', '+1']:
return 1.0
elif raw.strip() in ['-', '-1']:
return -1.0
else:
return float(raw)
else:
return None
except ValueError:
return None
@classmethod
def _verify_correction(cls, solution: str, identity: dict) -> bool:
# 解析 LLM 给出的系数 c形如 “c*x”
sol = solution.replace(" ", "")
match = re.fullmatch(r"([\-0-9\.eE]+)\*x", sol)
if not match:
return False
c = float(match.group(1))
# 验证 c ≈ -k
return abs(c + identity["k"]) < 1e-2
def _verify_correction(cls, solution: float, identity: dict) -> bool:
delta = abs(solution + identity["k"])
return delta < 1e-2
if __name__ == "__main__":
bootcamp = LinearODEBootcamp(seed=123)
bootcamp = LinearODEbootcamp(seed=123)
# 生成几个样例
examples = [bootcamp.case_generator() for _ in range(3)]
for identity in examples:
# 构造“模型”返回答案,模拟 LLM 的输出
coeff = -identity["k"]
sol = f"{coeff:.4f}*x"
sol = f"[answer]dx/dt = {coeff:.2f}*x[/answer]"
# 调用 Basebootcamp 提供的 verify_score 接口进行验证
score = bootcamp.verify_score(sol, identity, short_threshold=1e-2)
score = bootcamp.verify_score(sol, identity, short_penalty=False,format_penalty=False)
# 打印结果
print(json.dumps({
"identity": identity,
"solution": sol,
"extract_output": LinearODEbootcamp.extract_output(sol),
"verify_score": score
}, ensure_ascii=False, indent=2))

View file

@ -19,7 +19,8 @@ setuptools.setup(
"transformers",
"langdetect",
"pympler",
"shortuuid"
"shortuuid",
"rdkit"
],
package_data={