mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-25 17:10:49 +00:00
adding MR and LogP Prediction tasks
This commit is contained in:
parent
c2dad02fe4
commit
91972f43ea
7 changed files with 490 additions and 0 deletions
28
internbootcamp/bootcamp/ChemStructure2Property/InChI2MRBootCamp.py
Executable file
28
internbootcamp/bootcamp/ChemStructure2Property/InChI2MRBootCamp.py
Executable file
|
|
@ -0,0 +1,28 @@
|
|||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class InChI2MRBootCamp(InChI2logPbootcamp):
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
||||
instruction = f"Given the InChI, determine the Molar Refractivity (MR) value of the material. The InChI is: {InChI}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, InChI)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromInchi(InChI)
|
||||
true_MR = Crippen.MolMR(mol)
|
||||
print(f"Comparing pred: {solution}, ground_truth: {true_MR}")
|
||||
return abs(true_MR - float(solution)) <= 0.01 # maybe mse or mae better?
|
||||
|
||||
|
||||
|
||||
|
||||
56
internbootcamp/bootcamp/ChemStructure2Property/InChI2logPBootCamp.py
Executable file
56
internbootcamp/bootcamp/ChemStructure2Property/InChI2logPBootCamp.py
Executable file
|
|
@ -0,0 +1,56 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import InChIGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
|
||||
class InChI2logPbootcamp(Basebootcamp):
|
||||
def __init__(self, num_numbers=4, max_atoms=15, min_atoms=3, elements=None, seed=None):
|
||||
# super.__init__()
|
||||
self.num_numbers = num_numbers
|
||||
self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
return self.InChIGenerator.generate_n_valid_inchi(1)[0]
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
||||
instruction = f"Given the InChI, determine the lipophilicity (logP) value of the material. The InChI is: {InChI}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output):
|
||||
"""
|
||||
Extract the output from the solution.
|
||||
|
||||
Args:
|
||||
output: Model output to be processed.
|
||||
|
||||
Returns:
|
||||
The processed output.
|
||||
"""
|
||||
output = last_boxed_only_string(output)
|
||||
if output is None:
|
||||
return None
|
||||
return remove_boxed(output)
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, InChI)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromInchi(InChI)
|
||||
true_logp = Crippen.MolLogP(mol)
|
||||
print(f"Comparing pred: {solution}, ground_truth: {true_logp}")
|
||||
return abs(true_logp - float(solution)) <= 0.01 # maybe mse or mae better?
|
||||
|
||||
|
||||
|
||||
|
||||
34
internbootcamp/bootcamp/ChemStructure2Property/SMILES2MRBootCamp.py
Executable file
34
internbootcamp/bootcamp/ChemStructure2Property/SMILES2MRBootCamp.py
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
from .SMILES2logPBootCamp import SMILES2logPBootCamp
|
||||
|
||||
class SMILES2MRBootCamp(SMILES2logPBootCamp):
|
||||
|
||||
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
||||
instruction = f"Given the SMILES, determine the Molar Refractivity (MR) value of the material. The SMILES is: {SMILES}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, SMILES)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromSmiles(SMILES)
|
||||
true_MR = Crippen.MolMR(mol)
|
||||
print(f"Comparing pred: {solution}, ground_truth: {true_MR}")
|
||||
return abs(true_MR - float(solution)) <= 0.01 # maybe mse or mae better?
|
||||
|
||||
|
||||
|
||||
|
||||
43
internbootcamp/bootcamp/ChemStructure2Property/SMILES2logPBootCamp.py
Executable file
43
internbootcamp/bootcamp/ChemStructure2Property/SMILES2logPBootCamp.py
Executable file
|
|
@ -0,0 +1,43 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class SMILES2logPBootCamp(InChI2logPbootcamp):
|
||||
def __init__(self, num_numbers=4, min_len=5, max_len=25,
|
||||
seed=None):
|
||||
# super.__init__()
|
||||
self.num_numbers = num_numbers
|
||||
self.SMILESGenerator = SMILESGenerator(min_len=5, max_len=25, seed=None)
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
return self.SMILESGenerator.generate_n_valid_smiles(1)[0]
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
||||
instruction = f"Given the SMILES, determine the lipophilicity (logP) value of the material. The SMILES is: {SMILES}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, SMILES)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromSmiles(SMILES)
|
||||
true_logp = Crippen.MolLogP(mol)
|
||||
print(f"Comparing pred: {solution}, ground_truth: {true_logp}")
|
||||
return abs(true_logp - float(solution)) <= 0.01 # maybe mse or mae better?
|
||||
|
||||
|
||||
|
||||
|
||||
43
internbootcamp/bootcamp/ChemStructure2Property/utils.py
Normal file
43
internbootcamp/bootcamp/ChemStructure2Property/utils.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
def remove_boxed(s):
|
||||
if "\\boxed " in s:
|
||||
left = "\\boxed "
|
||||
assert s[:len(left)] == left
|
||||
return s[len(left):]
|
||||
|
||||
left = "\\boxed{"
|
||||
|
||||
assert s[:len(left)] == left
|
||||
assert s[-1] == "}"
|
||||
|
||||
return s[len(left):-1]
|
||||
|
||||
|
||||
def last_boxed_only_string(string):
|
||||
idx = string.rfind("\\boxed")
|
||||
if "\\boxed " in string:
|
||||
return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
|
||||
if idx < 0:
|
||||
idx = string.rfind("\\fbox")
|
||||
if idx < 0:
|
||||
return None
|
||||
|
||||
i = idx
|
||||
right_brace_idx = None
|
||||
num_left_braces_open = 0
|
||||
while i < len(string):
|
||||
if string[i] == "{":
|
||||
num_left_braces_open += 1
|
||||
if string[i] == "}":
|
||||
num_left_braces_open -= 1
|
||||
if num_left_braces_open == 0:
|
||||
right_brace_idx = i
|
||||
break
|
||||
i += 1
|
||||
|
||||
if right_brace_idx is None:
|
||||
retval = None
|
||||
else:
|
||||
retval = string[idx:right_brace_idx + 1]
|
||||
|
||||
return retval
|
||||
Loading…
Add table
Add a link
Reference in a new issue