mirror of
https://github.com/InternLM/InternBootcamp.git
synced 2026-04-28 17:29:37 +00:00
Merge branch 'dev_chemStructure2Property' into 'main'
Dev chem structure2 property See merge request lilinyang/internbootcamp!7
This commit is contained in:
commit
058cde1791
11 changed files with 512 additions and 0 deletions
29
internbootcamp/bootcamp/ChemStructure2Property/InChI2MRBootCamp.py
Executable file
29
internbootcamp/bootcamp/ChemStructure2Property/InChI2MRBootCamp.py
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class InChI2MRBootCamp(InChI2logPbootcamp):
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
||||
instruction = f"Given the InChI, determine the Molar Refractivity (MR) value of the material. The InChI is: {InChI}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, InChI)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromInchi(InChI)
|
||||
true_MR = Crippen.MolMR(mol)
|
||||
solution_float = float(solution)
|
||||
|
||||
# Handle case where true_logp is 0
|
||||
if true_MR == 0:
|
||||
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
|
||||
else:
|
||||
return abs(true_MR - solution_float)/abs(true_MR) <= 0.01
|
||||
|
||||
57
internbootcamp/bootcamp/ChemStructure2Property/InChI2logPBootCamp.py
Executable file
57
internbootcamp/bootcamp/ChemStructure2Property/InChI2logPBootCamp.py
Executable file
|
|
@ -0,0 +1,57 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import InChIGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
|
||||
class InChI2logPbootcamp(Basebootcamp):
|
||||
def __init__(self, max_atoms=15, min_atoms=3, elements=None, seed=None):
|
||||
# super.__init__()
|
||||
self.InChIGenerator = InChIGenerator(max_atoms=max_atoms, min_atoms=min_atoms, elements=elements, seed=seed)
|
||||
self.tolerance_factor = tolerance_factor # 1 for 1% error consider true, 0.1 for 0.1% error true, 10 for 10% error
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
return self.InChIGenerator.generate_n_valid_inchi(1)[0]
|
||||
|
||||
def prompt_func(self, InChI) -> str:
|
||||
|
||||
instruction = f"Given the InChI, determine the lipophilicity (logP) value of the material. The InChI is: {InChI}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def extract_output(output):
|
||||
"""
|
||||
Extract the output from the solution.
|
||||
|
||||
Args:
|
||||
output: Model output to be processed.
|
||||
|
||||
Returns:
|
||||
The processed output.
|
||||
"""
|
||||
output = last_boxed_only_string(output)
|
||||
if output is None:
|
||||
return None
|
||||
return remove_boxed(output)
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, InChI)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromInchi(InChI)
|
||||
true_logp = Crippen.MolLogP(mol)
|
||||
solution_float = float(solution)
|
||||
|
||||
# Handle case where true_logp is 0
|
||||
if true_logp == 0:
|
||||
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
|
||||
else:
|
||||
return abs(true_logp - solution_float)/abs(true_logp) <= 0.01
|
||||
32
internbootcamp/bootcamp/ChemStructure2Property/SMILES2MRBootCamp.py
Executable file
32
internbootcamp/bootcamp/ChemStructure2Property/SMILES2MRBootCamp.py
Executable file
|
|
@ -0,0 +1,32 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
from .SMILES2logPBootCamp import SMILES2logPBootCamp
|
||||
|
||||
class SMILES2MRBootCamp(SMILES2logPBootCamp):
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
||||
instruction = f"Given the SMILES, determine the Molar Refractivity (MR) value of the material. The SMILES is: {SMILES}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, SMILES)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromSmiles(SMILES)
|
||||
true_MR = Crippen.MolMR(mol)
|
||||
solution_float = float(solution)
|
||||
if true_MR == 0:
|
||||
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
|
||||
else:
|
||||
return abs(true_MR - solution_float)/abs(true_MR) <= 0.01
|
||||
|
||||
41
internbootcamp/bootcamp/ChemStructure2Property/SMILES2logPBootCamp.py
Executable file
41
internbootcamp/bootcamp/ChemStructure2Property/SMILES2logPBootCamp.py
Executable file
|
|
@ -0,0 +1,41 @@
|
|||
from internbootcamp.bootcamp.base import Basebootcamp
|
||||
from internbootcamp.libs.chemStructure2Property.ChemStructureGenerator import SMILESGenerator
|
||||
from .utils import last_boxed_only_string, remove_boxed
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import Crippen
|
||||
|
||||
from .InChI2logPBootCamp import InChI2logPbootcamp
|
||||
|
||||
class SMILES2logPBootCamp(InChI2logPbootcamp):
|
||||
def __init__(self,min_len=5, max_len=25,
|
||||
seed=None):
|
||||
# super.__init__()
|
||||
self.SMILESGenerator = SMILESGenerator(min_len=min_len, max_len=max_len, seed=seed)
|
||||
|
||||
def case_generator(self) -> str:
|
||||
"""
|
||||
生成一组数字和目标值。
|
||||
"""
|
||||
return self.SMILESGenerator.generate_n_valid_smiles(1)[0]
|
||||
|
||||
def prompt_func(self, SMILES) -> str:
|
||||
|
||||
instruction = f"Given the SMILES, determine the lipophilicity (logP) value of the material. The SMILES is: {SMILES}"
|
||||
instruction_following = """Let's think step by step and output the final answer within \\boxed{}.The final answer should be one float number. For example "Final Answer: \\boxed{afloat}"."""
|
||||
|
||||
prompt = instruction + '\n' + instruction_following
|
||||
return prompt
|
||||
|
||||
|
||||
@classmethod
|
||||
def _verify_correction(cls, solution, SMILES)->bool:
|
||||
"""
|
||||
Verify the correction of the solution.
|
||||
"""
|
||||
mol = Chem.MolFromSmiles(SMILES)
|
||||
true_logp = Crippen.MolLogP(mol)
|
||||
solution_float = float(solution)
|
||||
if true_logp == 0:
|
||||
return abs(solution_float) <= 0.01 # Just check if solution is close to 0
|
||||
else:
|
||||
return abs(true_logp - solution_float)/abs(true_logp) <= 0.01
|
||||
43
internbootcamp/bootcamp/ChemStructure2Property/utils.py
Normal file
43
internbootcamp/bootcamp/ChemStructure2Property/utils.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
def remove_boxed(s):
|
||||
if "\\boxed " in s:
|
||||
left = "\\boxed "
|
||||
assert s[:len(left)] == left
|
||||
return s[len(left):]
|
||||
|
||||
left = "\\boxed{"
|
||||
|
||||
assert s[:len(left)] == left
|
||||
assert s[-1] == "}"
|
||||
|
||||
return s[len(left):-1]
|
||||
|
||||
|
||||
def last_boxed_only_string(string):
|
||||
idx = string.rfind("\\boxed")
|
||||
if "\\boxed " in string:
|
||||
return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
|
||||
if idx < 0:
|
||||
idx = string.rfind("\\fbox")
|
||||
if idx < 0:
|
||||
return None
|
||||
|
||||
i = idx
|
||||
right_brace_idx = None
|
||||
num_left_braces_open = 0
|
||||
while i < len(string):
|
||||
if string[i] == "{":
|
||||
num_left_braces_open += 1
|
||||
if string[i] == "}":
|
||||
num_left_braces_open -= 1
|
||||
if num_left_braces_open == 0:
|
||||
right_brace_idx = i
|
||||
break
|
||||
i += 1
|
||||
|
||||
if right_brace_idx is None:
|
||||
retval = None
|
||||
else:
|
||||
retval = string[idx:right_brace_idx + 1]
|
||||
|
||||
return retval
|
||||
|
|
@ -30,6 +30,13 @@ from .bbeh_hyperbaton.hyperbaton_default import BBEHHyperbatonbootcamp
|
|||
from .bbeh_boardgame_qa.bbeh_boardgame_qa import Bbehboardgameqabootcamp
|
||||
from .bbeh_boolean_expressions.bbeh_boolean_expressions import Bbehbooleanexpressionsbootcamp
|
||||
|
||||
|
||||
from .ChemStructure2Property.InChI2logPBootCamp import InChI2logPbootcamp
|
||||
from .ChemStructure2Property.InChI2MRBootCamp import InChI2MRBootCamp
|
||||
from .ChemStructure2Property.SMILES2logPBootCamp import SMILES2logPBootCamp
|
||||
from .ChemStructure2Property.SMILES2MRBootCamp import SMILES2MRBootCamp
|
||||
|
||||
|
||||
from .kakurasu.kakurasu import Kakurasubootcamp
|
||||
from .nonograms.nonograms import Nonogramsbootcamp
|
||||
from .hitori.hitori import Hitoribootcamp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue