Merge branch 'dev_chemStructure2Property' into 'main'

Dev chem structure2 property

See merge request lilinyang/internbootcamp!7
This commit is contained in:
李林阳 2025-06-16 07:26:36 +00:00
commit 058cde1791
11 changed files with 512 additions and 0 deletions

View file

@ -0,0 +1,29 @@
from rdkit import Chem
from rdkit.Chem import Crippen
from .InChI2logPBootCamp import InChI2logPbootcamp
class InChI2MRBootCamp(InChI2logPbootcamp):
def prompt_func(self, InChI) -> str:
instruction = f"Given the InChI, determine the Molar Refractivity (MR) value of the material. The InChI is: {InChI}"
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
prompt = instruction + '\n' + instruction_following
return prompt
@classmethod
def _verify_correction(cls, solution, InChI)->bool:
"""
Verify the correction of the solution.
"""
mol = Chem.MolFromInchi(InChI)
true_MR = Crippen.MolMR(mol)
solution_float = float(solution)
# Handle case where true_logp is 0
if true_MR == 0:
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
else:
return abs(true_MR - solution_float)/abs(true_MR) <= 0.01

View file

@ -0,0 +1,57 @@
from internbootcamp.bootcamp.base import Basebootcamp
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import InChIGenerator
from .utils import last_boxed_only_string, remove_boxed
from rdkit import Chem
from rdkit.Chem import Crippen
class InChI2logPbootcamp(Basebootcamp):
def __init__(self, max_atoms=15, min_atoms=3, elements=None, seed=None):
# super.__init__()
self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
def case_generator(self) -> str:
"""
生成一组数字和目标值
"""
return self.InChIGenerator.generate_n_valid_inchi(1)[0]
def prompt_func(self, InChI) -> str:
instruction = f"Given the InChI, determine the lipophilicity (logP) value of the material. The InChI is: {InChI}"
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
prompt = instruction + '\n' + instruction_following
return prompt
@staticmethod
def extract_output(output):
"""
Extract the output from the solution.
Args:
output: Model output to be processed.
Returns:
The processed output.
"""
output = last_boxed_only_string(output)
if output is None:
return None
return remove_boxed(output)
@classmethod
def _verify_correction(cls, solution, InChI)->bool:
"""
Verify the correction of the solution.
"""
mol = Chem.MolFromInchi(InChI)
true_logp = Crippen.MolLogP(mol)
solution_float = float(solution)
# Handle case where true_logp is 0
if true_logp == 0:
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
else:
return abs(true_logp - solution_float)/abs(true_logp) <= 0.01

View file

@ -0,0 +1,32 @@
from internbootcamp.bootcamp.base import Basebootcamp
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
from .utils import last_boxed_only_string, remove_boxed
from rdkit import Chem
from rdkit.Chem import Crippen
from .SMILES2logPBootCamp import SMILES2logPBootCamp
class SMILES2MRBootCamp(SMILES2logPBootCamp):
def prompt_func(self, SMILES) -> str:
instruction = f"Given the SMILES, determine the Molar Refractivity (MR) value of the material. The SMILES is: {SMILES}"
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
prompt = instruction + '\n' + instruction_following
return prompt
@classmethod
def _verify_correction(cls, solution, SMILES)->bool:
"""
Verify the correction of the solution.
"""
mol = Chem.MolFromSmiles(SMILES)
true_MR = Crippen.MolMR(mol)
solution_float = float(solution)
if true_MR == 0:
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
else:
return abs(true_MR - solution_float)/abs(true_MR) <= 0.01

View file

@ -0,0 +1,41 @@
from internbootcamp.bootcamp.base import Basebootcamp
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
from .utils import last_boxed_only_string, remove_boxed
from rdkit import Chem
from rdkit.Chem import Crippen
from .InChI2logPBootCamp import InChI2logPbootcamp
class SMILES2logPBootCamp(InChI2logPbootcamp):
def __init__(self,min_len=5, max_len=25,
seed=None):
# super.__init__()
self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
def case_generator(self) -> str:
"""
生成一组数字和目标值
"""
return self.SMILESGenerator.generate_n_valid_smiles(1)[0]
def prompt_func(self, SMILES) -> str:
instruction = f"Given the SMILES, determine the lipophilicity (logP) value of the material. The SMILES is: {SMILES}"
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
prompt = instruction + '\n' + instruction_following
return prompt
@classmethod
def _verify_correction(cls, solution, SMILES)->bool:
"""
Verify the correction of the solution.
"""
mol = Chem.MolFromSmiles(SMILES)
true_logp = Crippen.MolLogP(mol)
solution_float = float(solution)
if true_logp == 0:
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
else:
return abs(true_logp - solution_float)/abs(true_logp) <= 0.01

View file

@ -0,0 +1,43 @@
def remove_boxed(s):
if "\\boxed " in s:
left = "\\boxed "
assert s[:len(left)] == left
return s[len(left):]
left = "\\boxed{"
assert s[:len(left)] == left
assert s[-1] == "}"
return s[len(left):-1]
def last_boxed_only_string(string):
idx = string.rfind("\\boxed")
if "\\boxed " in string:
return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
if idx < 0:
idx = string.rfind("\\fbox")
if idx < 0:
return None
i = idx
right_brace_idx = None
num_left_braces_open = 0
while i < len(string):
if string[i] == "{":
num_left_braces_open += 1
if string[i] == "}":
num_left_braces_open -= 1
if num_left_braces_open == 0:
right_brace_idx = i
break
i += 1
if right_brace_idx is None:
retval = None
else:
retval = string[idx:right_brace_idx + 1]
return retval

View file

@ -30,6 +30,13 @@ from .bbeh_hyperbaton.hyperbaton_default import BBEHHyperbatonbootcamp
from .bbeh_boardgame_qa.bbeh_boardgame_qa import Bbehboardgameqabootcamp
from .bbeh_boolean_expressions.bbeh_boolean_expressions import Bbehbooleanexpressionsbootcamp
from .ChemStructure2Property.InChI2logPBootCamp import InChI2logPbootcamp
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRBootCamp
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPBootCamp
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRBootCamp
from .kakurasu.kakurasu import Kakurasubootcamp
from .nonograms.nonograms import Nonogramsbootcamp
from .hitori.hitori import Hitoribootcamp