mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-19 12:58:04 +00:00
refactor(internbootcamp): standardize naming conventions and improve code structure
- Rename puzzle configuration files for consistency (e.g., InChI2logP_test.json) - Standardize class names to PascalCase (e.g., InChI2MRBootCamp -> InChI2MRbootCamp) - Improve code structure in various bootcamp modules for better readability and maintainability - Update import statements and file references to reflect new naming conventions - Enhance setup.py to include rdkit dependency
This commit is contained in:
parent
fdf5d19170
commit
324d068f8d
14 changed files with 120 additions and 59 deletions
|
|
@ -2,7 +2,7 @@ from rdkit import Chem
|
|||
from rdkit.Chem import Crippen
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class InChI2MRBootCamp(InChI2logPbootcamp):
|
||||
class InChI2MRbootCamp(InChI2logPbootcamp):
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
||||
|
|
|
|||
|
|
@ -8,13 +8,16 @@ from rdkit.Chem import Crippen
|
|||
class InChI2logPbootcamp(Basebootcamp):
|
||||
def __init__(self, max_atoms=15, min_atoms=3, elements=None, seed=None):
|
||||
# super.__init__()
|
||||
self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
|
||||
self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
|
||||
self.max_atoms = max_atoms
|
||||
self.min_atoms = min_atoms
|
||||
# self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
|
||||
# self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
self.InChIGenerator = InChIGenerator(max_atoms=self.max_atoms, min_atoms=self.min_atoms, elements=None, seed=None)
|
||||
return self.InChIGenerator.generate_n_valid_inchi(1)[0]
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ from .utils import last_boxed_only_string, remove_boxed
|
|||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
from .SMILES2logPBootCamp import SMILES2logPBootCamp
|
||||
from .SMILES2logPBootCamp import SMILES2logPbootcamp
|
||||
|
||||
class SMILES2MRBootCamp(SMILES2logPBootCamp):
|
||||
class SMILES2MRbootCamp(SMILES2logPbootcamp):
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
||||
|
|
|
|||
|
|
@ -6,16 +6,19 @@ from rdkit.Chem import Crippen
|
|||
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class SMILES2logPBootCamp(InChI2logPbootcamp):
|
||||
class SMILES2logPbootcamp(InChI2logPbootcamp):
|
||||
def __init__(self,min_len=5, max_len=25,
|
||||
seed=None):
|
||||
# super.__init__()
|
||||
self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
|
||||
self.min_len = min_len
|
||||
self.max_len = max_len
|
||||
# self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
self.SMILESGenerator = SMILESGenerator(min_len=self.min_len, max_len=self.max_len, seed=None)
|
||||
return self.SMILESGenerator.generate_n_valid_smiles(1)[0]
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
|
|
|||
|
|
@ -32,11 +32,14 @@ from .bbeh_boolean_expressions.bbeh_boolean_expressions import Bbehbooleanexpres
|
|||
|
||||
|
||||
from .ChemStructure2Property.InChI2logPBootCamp import InChI2logPbootcamp
|
||||
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRBootCamp
|
||||
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPBootCamp
|
||||
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRBootCamp
|
||||
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRbootCamp
|
||||
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPbootcamp
|
||||
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRbootCamp
|
||||
|
||||
|
||||
from .earth_dew2humidity.earth_dew2humidity import Earthdew2humiditybootcamp
|
||||
from .linearODE.linearODE import LinearODEbootcamp
|
||||
|
||||
from .kakurasu.kakurasu import Kakurasubootcamp
|
||||
from .nonograms.nonograms import Nonogramsbootcamp
|
||||
from .hitori.hitori import Hitoribootcamp
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import numpy as np
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
|
||||
|
||||
class earth_dew2humidity(Basebootcamp):
|
||||
class Earthdew2humiditybootcamp(Basebootcamp):
|
||||
def __init__(
|
||||
self,
|
||||
temperature_range=(-20, 40),
|
||||
|
|
@ -37,49 +37,71 @@ class earth_dew2humidity(Basebootcamp):
|
|||
"e = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65))\n"
|
||||
"e_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65))\n"
|
||||
"relative humidity = e / e_s * 100\n"
|
||||
"只需返回 “relative humidity = ”。"
|
||||
"以\\boxed{relative humidity = ?%} 格式输出你的最终答案,例如\\boxed{relative humidity = your answer%}。"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output: str) -> str:
|
||||
# 用正则提取“relative humidity = …”右侧的表达式
|
||||
m = re.search(r"relative humidity\s*=\s*([^\n\r]+)", output)
|
||||
return m.group(1).strip() if m else None
|
||||
def extract_output(output):
|
||||
boxed_start_index = output.find('\\boxed{')
|
||||
boxed_end_index = output.rfind('}', boxed_start_index)
|
||||
boxed = output[boxed_start_index + 7:boxed_end_index]
|
||||
# print(boxed)
|
||||
# 提取数字(含小数点)
|
||||
number_match = re.findall(r'\d+(?:\.\d+)?', boxed)[-1]
|
||||
if number_match:
|
||||
try:
|
||||
return float(number_match)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution: str, identity: dict) -> bool:
|
||||
# 解析 LLM 给出的系数 c,形如 “c*x”
|
||||
solution = solution.replace(" ", "")
|
||||
try:
|
||||
c = float(solution)
|
||||
except:
|
||||
return False
|
||||
# print(c)
|
||||
# 验证 c ≈ k
|
||||
return abs(c - identity["humidity"]) < 1e-2
|
||||
return abs(c - identity["humidity"]) < 1e-1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bootcamp = earth_dew2humidity(seed=123)
|
||||
# 生成几个样例
|
||||
examples = [bootcamp.case_generator() for _ in range(3)]
|
||||
print(examples)
|
||||
print(bootcamp.prompt_func(examples[0]))
|
||||
print(bootcamp.extract_output("xxxxx relative humidity = 111222 "))
|
||||
solution = bootcamp.extract_output("xxxxx relative humidity = 84.79 ")
|
||||
print(bootcamp._verify_correction(solution, examples[0]))
|
||||
solution = bootcamp.extract_output("xxxxx relative humidity = 83.79 ")
|
||||
print(bootcamp._verify_correction(solution, examples[0]))
|
||||
# bootcamp = Earthdew2humiditybootcamp(seed=123)
|
||||
# # 生成几个样例
|
||||
# examples = [bootcamp.case_generator() for _ in range(3)]
|
||||
# print(examples)
|
||||
# print(bootcamp.prompt_func(examples[0]))
|
||||
# print(bootcamp.extract_output("xxxxx relative humidity = 111222 "))
|
||||
# solution = bootcamp.extract_output("xxxxx relative humidity = 84.79 ")
|
||||
# print(bootcamp._verify_correction(solution, examples[0]))
|
||||
# solution = bootcamp.extract_output("[answer]relative humidity = relative humidity = 83.79%[/answer]")
|
||||
# print(bootcamp._verify_correction(solution, examples[0]))
|
||||
|
||||
for identity in examples:
|
||||
# 构造“模型”返回答案,模拟 LLM 的输出
|
||||
humidity = identity["humidity"]
|
||||
sol = f"{humidity:.4f}"
|
||||
# 调用 Basebootcamp 提供的 verify_score 接口进行验证
|
||||
score = bootcamp.verify_score(sol, identity, short_threshold=1e-2)
|
||||
# 打印结果
|
||||
print(json.dumps({
|
||||
"identity": identity,
|
||||
"solution": sol,
|
||||
"verify_score": score
|
||||
}, ensure_ascii=False, indent=2))
|
||||
# for identity in examples:
|
||||
# # 构造“模型”返回答案,模拟 LLM 的输出
|
||||
# humidity = identity["humidity"]
|
||||
# sol = f"\\boxed{{relative humidity = {humidity:.4f}%}}"
|
||||
# # 调用 Basebootcamp 提供的 verify_score 接口进行验证
|
||||
# score = bootcamp.verify_score(sol, identity, short_penalty=False,format_penalty=False)
|
||||
# # 打印结果
|
||||
# print(json.dumps({
|
||||
# "identity": identity,
|
||||
# "solution": sol,
|
||||
# "extract_output": Earthdew2humiditybootcamp.extract_output(sol),
|
||||
# "verify_score": score
|
||||
# }, ensure_ascii=False, indent=2))
|
||||
case = {
|
||||
"id": 49,
|
||||
"prompt": "下面给出露点温度(dewpoint)=13.225845924995145 (摄氏度)\n温度(temperature)=13.427933720529904 (摄氏度)\n请计算湿度,计算公式为:\ndewpoint = dewpoint + 273.15,temperature = temperature + 273.15\ne = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65))\ne_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65))\nrelative humidity = e / e_s * 100\n以 \\boxed{relative humidity = ?%} 格式输出你的最终答案,例如 \\boxed{relative humidity = your answer%}。",
|
||||
"output_len": 130,
|
||||
"score": 0,
|
||||
"ground_truth": {
|
||||
"dewpoint": 13.225845924995145,
|
||||
"temperature": 13.427933720529904,
|
||||
"humidity": 98.69041013549538
|
||||
},
|
||||
"output": "首先,将露点温度和温度转换为绝对温度。露点温度为13.225845924995145摄氏度,加上273.15得到露点温度的绝对温度,即为286.37584592499516 K。同样地,温度13.427933720529904摄氏度加上273.15,得到绝对温度286.5779337205299 K。\n\n接下来,计算露点温度对应的饱和水蒸气压。使用公式e = 611.2 * np.exp(17.67 * (dewpoint - 273.15) / (dewpoint - 29.65)),其中dewpoint为286.37584592499516 K。计算得到e的值约为654.54 hPa。\n\n然后,计算温度对应的饱和水蒸气压。使用公式e_s = 611.2 * np.exp(17.67 * (temperature - 273.15) / (temperature - 29.65)),其中temperature为286.5779337205299 K。计算得到e_s的值约为656.53 hPa。\n\n最后,计算相对湿度。相对湿度等于e除以e_s,再乘以100,即(654.54 / 656.53) * 100 ≈ 99.69%。\n</think>\n\n好的,我们按照给定的公式计算相对湿度。\n\n**步骤如下:**\n\n1. **将露点温度和温度转换为绝对温度:**\n \n \\[\n \\text{dewpoint} = 13.225845924995145\\,^\\circ\\text{C} + 273.15 = 286.37584592499516\\,\\text{K}\n \\]\n \n \\[\n \\text{temperature} = 13.427933720529904\\,^\\circ\\text{C} + 273.15 = 286.5779337205299\\,\\text{K}\n \\]\n\n2. **计算露点温度对应的饱和水蒸气压 \\( e \\):**\n \n \\[\n e = 611.2 \\times \\exp\\left(\\frac{17.67 \\times (286.37584592499516 - 273.15)}{286.37584592499516 - 29.65}\\right) \\approx 654.54\\,\\text{hPa}\n \\]\n\n3. **计算温度对应的饱和水蒸气压 \\( e_s \\):**\n \n \\[\n e_s = 611.2 \\times \\exp\\left(\\frac{17.67 \\times (286.5779337205299 - 273.15)}{286.5779337205299 - 29.65}\\right) \\approx 656.53\\,\\text{hPa}\n \\]\n\n4. **计算相对湿度:**\n \n \\[\n \\text{relative humidity} = \\left( \\frac{e}{e_s} \\right) \\times 100 = \\left( \\frac{654.54}{656.53} \\right) \\times 100 \\approx 99.69\\%\n \\]\n\n**最终答案:**\n\n\\[\n\\boxed{\\text{relative humidity} = 99.69\\%}\n\\]"
|
||||
}
|
||||
print(Earthdew2humiditybootcamp.extract_output(case['output']))
|
||||
|
|
@ -5,7 +5,7 @@ from scipy.integrate import odeint
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
|
||||
|
||||
class LinearODEBootcamp(Basebootcamp):
|
||||
class LinearODEbootcamp(Basebootcamp):
|
||||
def __init__(
|
||||
self,
|
||||
k_range=(0.1, 1.0),
|
||||
|
|
@ -38,41 +38,70 @@ class LinearODEBootcamp(Basebootcamp):
|
|||
return (
|
||||
f"下面给出变量 x(t) 的观测数据点:\n{points}\n\n"
|
||||
"请找出其满足的微分方程,形式为:dx/dt = f(x)。\n"
|
||||
"只需返回 “dx/dt = <表达式>”。"
|
||||
"以dx/dt = <表达式>格式表示你的答案。"
|
||||
"并且使用[answer]标签包裹你的最终答案, 例如[answer]dx/dt = <表达式>[/answer]."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output: str) -> str:
|
||||
# 用正则提取“dx/dt = …”右侧的表达式
|
||||
m = re.search(r"dx/dt\s*=\s*([^\n\r]+)", output)
|
||||
return m.group(1).strip() if m else None
|
||||
def extract_output(output):
|
||||
import re
|
||||
matches = re.findall(r'\[answer\](.*?)\[/answer\]', output, re.DOTALL)
|
||||
if not matches:
|
||||
return None
|
||||
last_match = matches[-1].strip()
|
||||
try:
|
||||
raw_expr = last_match.replace('dx/dt = ', '').strip()
|
||||
expr = raw_expr.strip()
|
||||
pattern = re.fullmatch(
|
||||
r"""
|
||||
([+-]?\s* # 可选的正负号,后可带空格
|
||||
(?:\d+(?:\.\d*)? # 整数或小数点后数字
|
||||
|\.\d+)? # 或只有小数部分
|
||||
(?:[eE][+-]?\d+)? # 可选的科学计数部分
|
||||
)? # 整个系数是可选的(允许直接 x 或 -x)
|
||||
\s*\*?\s* # 可选乘号,前后允许空格
|
||||
[xX] # x 或 X
|
||||
""",
|
||||
expr,
|
||||
re.VERBOSE
|
||||
)
|
||||
|
||||
if pattern:
|
||||
raw = pattern.group(1)
|
||||
if raw is None or raw.strip() == '':
|
||||
return 1.0
|
||||
elif raw.strip() in ['+', '+1']:
|
||||
return 1.0
|
||||
elif raw.strip() in ['-', '-1']:
|
||||
return -1.0
|
||||
else:
|
||||
return float(raw)
|
||||
else:
|
||||
return None
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution: str, identity: dict) -> bool:
|
||||
# 解析 LLM 给出的系数 c,形如 “c*x”
|
||||
sol = solution.replace(" ", "")
|
||||
match = re.fullmatch(r"([\-0-9\.eE]+)\*x", sol)
|
||||
if not match:
|
||||
return False
|
||||
c = float(match.group(1))
|
||||
# 验证 c ≈ -k
|
||||
return abs(c + identity["k"]) < 1e-2
|
||||
def _verify_correction(cls, solution: float, identity: dict) -> bool:
|
||||
delta = abs(solution + identity["k"])
|
||||
return delta < 1e-2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bootcamp = LinearODEBootcamp(seed=123)
|
||||
bootcamp = LinearODEbootcamp(seed=123)
|
||||
# 生成几个样例
|
||||
examples = [bootcamp.case_generator() for _ in range(3)]
|
||||
|
||||
for identity in examples:
|
||||
# 构造“模型”返回答案,模拟 LLM 的输出
|
||||
coeff = -identity["k"]
|
||||
sol = f"{coeff:.4f}*x"
|
||||
sol = f"[answer]dx/dt = {coeff:.2f}*x[/answer]"
|
||||
# 调用 Basebootcamp 提供的 verify_score 接口进行验证
|
||||
score = bootcamp.verify_score(sol, identity, short_threshold=1e-2)
|
||||
score = bootcamp.verify_score(sol, identity, short_penalty=False,format_penalty=False)
|
||||
# 打印结果
|
||||
print(json.dumps({
|
||||
"identity": identity,
|
||||
"solution": sol,
|
||||
"extract_output": LinearODEbootcamp.extract_output(sol),
|
||||
"verify_score": score
|
||||
}, ensure_ascii=False, indent=2))
|
||||
3
setup.py
3
setup.py
|
|
@ -19,7 +19,8 @@ setuptools.setup(
|
|||
"transformers",
|
||||
"langdetect",
|
||||
"pympler",
|
||||
"shortuuid"
|
||||
"shortuuid",
|
||||
"rdkit"
|
||||
],
|
||||
|
||||
package_data={
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue