Skip to content

Commit

Permalink
add L to fragment with fewest Ls and R to fragment to fewest Rs where…
Browse files Browse the repository at this point in the history
… possible, try to cut fragments in near equal size if possible, else default to size_threshold=5
  • Loading branch information
donerancl committed May 20, 2024
1 parent 6a5ebf4 commit 61ffa80
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 52 deletions.
124 changes: 77 additions & 47 deletions rmgpy/molecule/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from rmgpy.molecule.molecule import Atom, Bond, Molecule
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
from rdkit import Chem

from numpy.random import randint
# this variable is used to name atom IDs so that there are as few conflicts by
# using the entire space of integer objects
ATOM_ID_COUNTER = -(2**15)
Expand Down Expand Up @@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
frag_list.append(res_frag)
return frag_list

def sliceitup_arom(self, molecule, size_threshold=None):
def sliceitup_arom(self, molecule, size_threshold=5):
"""
Several specified aromatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -950,27 +945,47 @@ def sliceitup_arom(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down Expand Up @@ -1014,15 +1029,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
frag_list_new.append(res_frag)
return frag_list_new

def sliceitup_aliph(self, molecule, size_threshold=None):
def sliceitup_aliph(self, molecule, size_threshold=5):
"""
Several specified aliphatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -1079,27 +1089,47 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down
15 changes: 10 additions & 5 deletions rmgpy/rmg/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from rmgpy.rmg.decay import decay_species
from rmgpy.rmg.reactors import PhaseSystem, Phase, Interface, Reactor
from rmgpy.molecule.fragment import Fragment

from rmgpy.molecule.molecule import Molecule
################################################################################


Expand Down Expand Up @@ -303,12 +303,17 @@ def make_new_species(self, object, label="", reactive=True, check_existing=True,

# If we're here then we're ready to make the new species
if check_cut:
try:
mols = molecule.cut_molecule(cut_through=False)
except AttributeError:
# it's Molecule object, change it to Fragment and then cut
# set size_threshold to about half the molecule size
size_threshold = int((molecule.smiles.count("C")+molecule.smiles.count("c"))/2)
# if the molecule type is Molecule, change type to Fragment before cutting
if type(molecule) == Molecule:
molecule = Fragment().from_adjacency_list(molecule.to_adjacency_list())
# try to cut_molecule with size threshold set above
mols = molecule.cut_molecule(cut_through=False,size_threshold=size_threshold)
# if cut above can't be made try with default size_threshold = 5
if len(mols) == 1:
mols = molecule.cut_molecule(cut_through=False)
# if cut above can't be made, don't cut
if len(mols) == 1:
molecule = mols[0]
else:
Expand Down

0 comments on commit 61ffa80

Please sign in to comment.