Skip to content

Commit

Permalink
add fragmenter
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff231li committed Sep 18, 2023
1 parent 553e023 commit faa8924
Show file tree
Hide file tree
Showing 2 changed files with 360 additions and 0 deletions.
2 changes: 2 additions & 0 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ dependencies:
- pyyaml
- plumed
- intermol
- openff-interchange >=0.3.7
- openff-toolkit >=0.11
- openff-units >=0.2.0
- openff-utilities

Expand Down
358 changes: 358 additions & 0 deletions paprika/build/system/fragmenter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,358 @@
import os
import subprocess
from typing import List

import openmm.unit as openmm_unit
import parmed
from openff.toolkit.topology import Molecule
from openff.units import unit
from rdkit.Chem import AllChem as Chem


class CyclodextrinFragmenter:
"""
Class that defines a Cyclodextrin molecule with methods to assign partial charges
using a fragment-based approach. This adapted from Tobias Hüfner's code:
https://github.com/wutobias/collection/blob/master/python/am1bcc-glyco-fragmenter.py
"""

@property
def monomers(self) -> List[Chem.Mol]:
"""list: A list of monomers."""
return self._monomers

@monomers.setter
def monomers(self, value: List[Chem.Mol]):
self._monomers = value

@property
def input_molecule(self) -> Chem.Mol:
"""Chem.Mol: The Cyclodextrin molecule."""
return self._input_molecule

@input_molecule.setter
def input_molecule(self, value: Chem.Mol):
self._input_molecule = value

@property
def dummy_atom_labels(self) -> List[int]:
"""list: List of dummy atom labels -- for O1 and C4 bridge."""
return self._dummy_atom_labels

@property
def label_O1(self) -> int:
"""int: Integer label for atom O1"""
return self._label_O1

@label_O1.setter
def label_O1(self, value: int):
self._label_O1 = value
self._dummy_atom_labels[0] = value

@property
def label_C4(self) -> int:
"""int: Integer label for atom C4"""
return self._label_C4

@label_C4.setter
def label_C4(self, value: int):
self._label_C4 = value
self._dummy_atom_labels[1] = value

def __init__(self, input_molecule: Chem.Mol, monomers: List[Chem.Mol] = None):
self._input_molecule = input_molecule
self._monomers = [] if monomers is None else monomers
self._capped_monomers = []
self._capped_monomers_atom_indices = []
self._capped_monomers_smiles = []
self._capped_monomers_index = []
self._capped_monomers_charges = {}
self._label_O1 = 1
self._label_C4 = 4
self._dummy_atom_labels = [self._label_O1, self._label_C4]
self._partial_charge_list = []
self._partial_charge_dictionary = {}

def add_monomer(self, molecule: Chem.Mol):
"""Utility function that adds monomers to the list."""
self.monomers.append(molecule)

def assign_partial_charges(self, partial_charge_method="am1bcc"):
"""
Assign partial charges to cyclodextrin molecule using a fragment-based approach
Parameters
----------
partial_charge_method: str
The partial charge method (see OpenFF-Toolkit documentation)
"""

# Generate charges for each capped monomer
openff_list = dict()
self._capped_monomers_charges = dict()
self._get_capped_monomer()

for fragment_index in list(set(self._capped_monomers_index)):
# Generate Conformers
molecule = Chem.AddHs(self._capped_monomers[fragment_index])
Chem.EmbedMultipleConfs(molecule)
Chem.MMFFOptimizeMoleculeConfs(molecule)

# Write SDF file for methyl-capped fragment
writer = Chem.SDWriter(f"./frag{fragment_index}.sdf")
writer.write(molecule, confId=1)
writer.close()

# Assign partial charges using OFF-Toolkit with Antechamber Backend
molecule_openff = Molecule.from_file(f"./frag{fragment_index}.sdf")
molecule_openff.assign_partial_charges(
partial_charge_method=partial_charge_method,
normalize_partial_charges=True,
)

# Add molecule and partial charges to list
openff_list[fragment_index] = molecule_openff
try:
self._capped_monomers_charges[
fragment_index
] = molecule_openff.partial_charges.m_as(unit.elementary_charge)
except AttributeError:
self._capped_monomers_charges[
fragment_index
] = molecule_openff.partial_charges.value_in_unit(
openmm_unit.elementary_charge
)

# Clean up
os.remove(f"./frag{fragment_index}.sdf")

# Add monomer charges to cyclodextrin molecule
self._partial_charge_dictionary = dict()
current_charge = 0.0

# Loop over monomers
for monomer_index, fragment_index in enumerate(self._capped_monomers_index):
molecule_atom_indices = self._capped_monomers_atom_indices[monomer_index]
molecule = self._capped_monomers[monomer_index]
self_match = (
openff_list[fragment_index].to_rdkit().GetSubstructMatch(molecule)
)
for (
fragment_atom_index,
molecule_atom_index,
) in enumerate(molecule_atom_indices):
partial_charge = self._capped_monomers_charges[fragment_index][
self_match[fragment_atom_index]
]
self._partial_charge_dictionary[molecule_atom_index] = partial_charge
current_charge += partial_charge

# Normalize partial charges
expected_charge = float(Chem.GetFormalCharge(self.input_molecule))
charge_offset = (current_charge - expected_charge) / float(
len(self._partial_charge_dictionary)
)
self._partial_charge_list = list()

for atom_index in range(self.input_molecule.GetNumAtoms()):
# Skip capped atoms
if atom_index not in self._partial_charge_dictionary:
continue

charge = self._partial_charge_dictionary[atom_index] - charge_offset
self._partial_charge_list.append(str(charge))

# Add partial charges to RDKit Molecule instance
self.input_molecule.SetProp(
"atom.dprop.PartialCharge", "\n".join(self._partial_charge_list)
)

def to_file(self, file, file_format, residue_name="MGO", atom_type="sybyl"):
"""
Save the cyclodextrin molecule to file.
Parameters
----------
file: str
File name
file_format: str
File format -- either SDF, MOL2 and PDB (through parmed)
residue_name: str
Residue name -- mainly for MOL2 files
atom_type: str
The atom type in MOL2 files -- gaff, gaff2, amber, bcc or sybyl
"""
off_molecule = Molecule.from_rdkit(self.input_molecule)

if file_format == "SDF":
off_molecule.to_file(file, file_format=file_format)

elif file_format == "MOL2" or file_format == "PDB":
structure = parmed.openmm.load_topology(
off_molecule.to_topology().to_openmm(), xyz=off_molecule.conformers[0]
)
# Add charges to molecule
for atom, charge in zip(structure.atoms, self._partial_charge_list):
atom.charge = float(charge)

# Save current structure to MOL2 file
structure.save(file, overwrite=True)

# Rewrite with residue name and atom type using Antechamber
output = subprocess.Popen(
[
"antechamber",
"-fi",
"mol2",
"-fo",
file_format.lower(),
"-i",
file,
"-o",
file,
"-rn",
residue_name,
"-at",
atom_type,
"-pf",
"y",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd="./",
)
output = output.stdout.read().decode().splitlines()

@staticmethod
def _get_atom_map_number(atom: Chem.Atom) -> int:
"""
Given a RDKit Atom, return the atom map number for an atom.
For cyclodextrin, I labelled `1` and `4` for O1 and C4 atoms, respectively.
"""
if "molAtomMapNumber" in atom.GetPropNames():
i = int(atom.GetProp("molAtomMapNumber"))
else:
i = -1

return i

def _get_capped_monomer(self):
"""
Get matched monomers and cap terminals with methyl.
"""
self._capped_monomers = list()
self._capped_monomers_atom_indices = list()

# Get bond list
bond_list, dummy_labels = self._get_bond_list()

# Bond break based on bond list and dummy labels
input_molecule_fragment = Chem.FragmentOnBonds(
self.input_molecule,
bond_list,
dummyLabels=dummy_labels,
)

# Loop over fragments
for fragment_molecule, fragment_atom_indices in zip(
Chem.GetMolFrags(input_molecule_fragment, asMols=True),
Chem.GetMolFrags(input_molecule_fragment, asMols=False),
):
atom_index_list = list()
rwmol = Chem.RWMol(fragment_molecule)

for atom, atom_index in zip(
fragment_molecule.GetAtoms(), fragment_atom_indices
):
# Get label
label = atom.GetIsotope()

# Add Methyl to O1
if label == self.label_O1:
rwmol.ReplaceAtom(atom.GetIdx(), Chem.Atom(8))
rwmol.AddAtom(Chem.Atom(6))
rwmol.AddBond(
atom.GetIdx(),
fragment_molecule.GetNumAtoms(),
order=Chem.BondType.SINGLE,
)
fragment_molecule = rwmol.GetMol()

# Add Methyl to C4
elif label == self.label_C4:
rwmol.ReplaceAtom(atom.GetIdx(), Chem.Atom(6))

# Add atom index to list
else:
atom_index_list.append(atom_index)

# Generate 2D coordinates
fragment_molecule = rwmol.GetMol()
Chem.SanitizeMol(fragment_molecule)
fragment_molecule.Compute2DCoords()

# Add capped fragment molecule to list
self._capped_monomers.append(fragment_molecule)
self._capped_monomers_atom_indices.append(atom_index_list)

# Generate unique smiles and index
smiles_list = [
Chem.CanonSmiles(Chem.MolToSmiles(mol)) for mol in self._capped_monomers
]
self._capped_monomers_smiles = list(set(smiles_list))
self._capped_monomers_index = [
smiles_list.index(smiles) for smiles in smiles_list
]

def _get_bond_list(self):
"""
Get bond index between monomers - used to cut the macrocylic molecule
"""
bond_list = list()
dummy_labels = list()

# Loop over monomers
for monomer in self.monomers:
# Find monomer in molecule
input_matches = self.input_molecule.GetSubstructMatches(monomer)

# Loop over monomer matches
for match in input_matches:
# Loop over atom indices in `match`
for query_index, atom_index in enumerate(match):
# Select only O1 atom.
if (
self._get_atom_map_number(monomer.GetAtomWithIdx(query_index))
!= 1
):
continue

atom = self.input_molecule.GetAtomWithIdx(atom_index)

# Loop over neighbors
for neighbor_atom in atom.GetNeighbors():
# Select neighboring atom not in `SubstructMatch` (or in monomer)
if neighbor_atom.GetIdx() in match:
continue

# Get bond between atom O1 and its neighbor (i.e. C4)
bond = self.input_molecule.GetBondBetweenAtoms(
atom_index, neighbor_atom.GetIdx()
)
bond_index = bond.GetIdx()

# Add to list unique bond_index
if bond_index in bond_list:
continue

bond_list.append(bond_index)

# Set labels for dummy
if bond.GetBeginAtomIdx() == atom_index:
dummy_labels.append(self.dummy_atom_labels)
else:
dummy_labels.append(self.dummy_atom_labels[::-1])

return bond_list, dummy_labels

0 comments on commit faa8924

Please sign in to comment.