add fragmenter

GilsonLabUCSD · Sep 18, 2023 · faa8924 · faa8924
1 parent 553e023
commit faa8924
Show file tree

Hide file tree

Showing 2 changed files with 360 additions and 0 deletions.
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -16,6 +16,8 @@ dependencies:
   - pyyaml
   - plumed
   - intermol
+  - openff-interchange >=0.3.7
+  - openff-toolkit >=0.11
   - openff-units >=0.2.0
   - openff-utilities
 

diff --git a/paprika/build/system/fragmenter.py b/paprika/build/system/fragmenter.py
@@ -0,0 +1,358 @@
+import os
+import subprocess
+from typing import List
+
+import openmm.unit as openmm_unit
+import parmed
+from openff.toolkit.topology import Molecule
+from openff.units import unit
+from rdkit.Chem import AllChem as Chem
+
+
+class CyclodextrinFragmenter:
+    """
+    Class that defines a Cyclodextrin molecule with methods to assign partial charges
+    using a fragment-based approach. This adapted from Tobias Hüfner's code:
+
+    https://github.com/wutobias/collection/blob/master/python/am1bcc-glyco-fragmenter.py
+    """
+
+    @property
+    def monomers(self) -> List[Chem.Mol]:
+        """list: A list of monomers."""
+        return self._monomers
+
+    @monomers.setter
+    def monomers(self, value: List[Chem.Mol]):
+        self._monomers = value
+
+    @property
+    def input_molecule(self) -> Chem.Mol:
+        """Chem.Mol: The Cyclodextrin molecule."""
+        return self._input_molecule
+
+    @input_molecule.setter
+    def input_molecule(self, value: Chem.Mol):
+        self._input_molecule = value
+
+    @property
+    def dummy_atom_labels(self) -> List[int]:
+        """list: List of dummy atom labels -- for O1 and C4 bridge."""
+        return self._dummy_atom_labels
+
+    @property
+    def label_O1(self) -> int:
+        """int: Integer label for atom O1"""
+        return self._label_O1
+
+    @label_O1.setter
+    def label_O1(self, value: int):
+        self._label_O1 = value
+        self._dummy_atom_labels[0] = value
+
+    @property
+    def label_C4(self) -> int:
+        """int: Integer label for atom C4"""
+        return self._label_C4
+
+    @label_C4.setter
+    def label_C4(self, value: int):
+        self._label_C4 = value
+        self._dummy_atom_labels[1] = value
+
+    def __init__(self, input_molecule: Chem.Mol, monomers: List[Chem.Mol] = None):
+        self._input_molecule = input_molecule
+        self._monomers = [] if monomers is None else monomers
+        self._capped_monomers = []
+        self._capped_monomers_atom_indices = []
+        self._capped_monomers_smiles = []
+        self._capped_monomers_index = []
+        self._capped_monomers_charges = {}
+        self._label_O1 = 1
+        self._label_C4 = 4
+        self._dummy_atom_labels = [self._label_O1, self._label_C4]
+        self._partial_charge_list = []
+        self._partial_charge_dictionary = {}
+
+    def add_monomer(self, molecule: Chem.Mol):
+        """Utility function that adds monomers to the list."""
+        self.monomers.append(molecule)
+
+    def assign_partial_charges(self, partial_charge_method="am1bcc"):
+        """
+        Assign partial charges to cyclodextrin molecule using a fragment-based approach
+
+        Parameters
+        ----------
+        partial_charge_method: str
+            The partial charge method (see OpenFF-Toolkit documentation)
+
+        """
+
+        # Generate charges for each capped monomer
+        openff_list = dict()
+        self._capped_monomers_charges = dict()
+        self._get_capped_monomer()
+
+        for fragment_index in list(set(self._capped_monomers_index)):
+            # Generate Conformers
+            molecule = Chem.AddHs(self._capped_monomers[fragment_index])
+            Chem.EmbedMultipleConfs(molecule)
+            Chem.MMFFOptimizeMoleculeConfs(molecule)
+
+            # Write SDF file for methyl-capped fragment
+            writer = Chem.SDWriter(f"./frag{fragment_index}.sdf")
+            writer.write(molecule, confId=1)
+            writer.close()
+
+            # Assign partial charges using OFF-Toolkit with Antechamber Backend
+            molecule_openff = Molecule.from_file(f"./frag{fragment_index}.sdf")
+            molecule_openff.assign_partial_charges(
+                partial_charge_method=partial_charge_method,
+                normalize_partial_charges=True,
+            )
+
+            # Add molecule and partial charges to list
+            openff_list[fragment_index] = molecule_openff
+            try:
+                self._capped_monomers_charges[
+                    fragment_index
+                ] = molecule_openff.partial_charges.m_as(unit.elementary_charge)
+            except AttributeError:
+                self._capped_monomers_charges[
+                    fragment_index
+                ] = molecule_openff.partial_charges.value_in_unit(
+                    openmm_unit.elementary_charge
+                )
+
+            # Clean up
+            os.remove(f"./frag{fragment_index}.sdf")
+
+        # Add monomer charges to cyclodextrin molecule
+        self._partial_charge_dictionary = dict()
+        current_charge = 0.0
+
+        # Loop over monomers
+        for monomer_index, fragment_index in enumerate(self._capped_monomers_index):
+            molecule_atom_indices = self._capped_monomers_atom_indices[monomer_index]
+            molecule = self._capped_monomers[monomer_index]
+            self_match = (
+                openff_list[fragment_index].to_rdkit().GetSubstructMatch(molecule)
+            )
+            for (
+                fragment_atom_index,
+                molecule_atom_index,
+            ) in enumerate(molecule_atom_indices):
+                partial_charge = self._capped_monomers_charges[fragment_index][
+                    self_match[fragment_atom_index]
+                ]
+                self._partial_charge_dictionary[molecule_atom_index] = partial_charge
+                current_charge += partial_charge
+
+        # Normalize partial charges
+        expected_charge = float(Chem.GetFormalCharge(self.input_molecule))
+        charge_offset = (current_charge - expected_charge) / float(
+            len(self._partial_charge_dictionary)
+        )
+        self._partial_charge_list = list()
+
+        for atom_index in range(self.input_molecule.GetNumAtoms()):
+            # Skip capped atoms
+            if atom_index not in self._partial_charge_dictionary:
+                continue
+
+            charge = self._partial_charge_dictionary[atom_index] - charge_offset
+            self._partial_charge_list.append(str(charge))
+
+        # Add partial charges to RDKit Molecule instance
+        self.input_molecule.SetProp(
+            "atom.dprop.PartialCharge", "\n".join(self._partial_charge_list)
+        )
+
+    def to_file(self, file, file_format, residue_name="MGO", atom_type="sybyl"):
+        """
+        Save the cyclodextrin molecule to file.
+
+        Parameters
+        ----------
+        file: str
+            File name
+        file_format: str
+            File format -- either SDF, MOL2 and PDB (through parmed)
+        residue_name: str
+            Residue name -- mainly for MOL2 files
+        atom_type: str
+            The atom type in MOL2 files -- gaff, gaff2, amber, bcc or sybyl
+        """
+        off_molecule = Molecule.from_rdkit(self.input_molecule)
+
+        if file_format == "SDF":
+            off_molecule.to_file(file, file_format=file_format)
+
+        elif file_format == "MOL2" or file_format == "PDB":
+            structure = parmed.openmm.load_topology(
+                off_molecule.to_topology().to_openmm(), xyz=off_molecule.conformers[0]
+            )
+            # Add charges to molecule
+            for atom, charge in zip(structure.atoms, self._partial_charge_list):
+                atom.charge = float(charge)
+
+            # Save current structure to MOL2 file
+            structure.save(file, overwrite=True)
+
+            # Rewrite with residue name and atom type using Antechamber
+            output = subprocess.Popen(
+                [
+                    "antechamber",
+                    "-fi",
+                    "mol2",
+                    "-fo",
+                    file_format.lower(),
+                    "-i",
+                    file,
+                    "-o",
+                    file,
+                    "-rn",
+                    residue_name,
+                    "-at",
+                    atom_type,
+                    "-pf",
+                    "y",
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                cwd="./",
+            )
+            output = output.stdout.read().decode().splitlines()
+
+    @staticmethod
+    def _get_atom_map_number(atom: Chem.Atom) -> int:
+        """
+        Given a RDKit Atom, return the atom map number for an atom.
+        For cyclodextrin, I labelled `1` and `4` for O1 and C4 atoms, respectively.
+        """
+        if "molAtomMapNumber" in atom.GetPropNames():
+            i = int(atom.GetProp("molAtomMapNumber"))
+        else:
+            i = -1
+
+        return i
+
+    def _get_capped_monomer(self):
+        """
+        Get matched monomers and cap terminals with methyl.
+        """
+        self._capped_monomers = list()
+        self._capped_monomers_atom_indices = list()
+
+        # Get bond list
+        bond_list, dummy_labels = self._get_bond_list()
+
+        # Bond break based on bond list and dummy labels
+        input_molecule_fragment = Chem.FragmentOnBonds(
+            self.input_molecule,
+            bond_list,
+            dummyLabels=dummy_labels,
+        )
+
+        # Loop over fragments
+        for fragment_molecule, fragment_atom_indices in zip(
+            Chem.GetMolFrags(input_molecule_fragment, asMols=True),
+            Chem.GetMolFrags(input_molecule_fragment, asMols=False),
+        ):
+            atom_index_list = list()
+            rwmol = Chem.RWMol(fragment_molecule)
+
+            for atom, atom_index in zip(
+                fragment_molecule.GetAtoms(), fragment_atom_indices
+            ):
+                # Get label
+                label = atom.GetIsotope()
+
+                # Add Methyl to O1
+                if label == self.label_O1:
+                    rwmol.ReplaceAtom(atom.GetIdx(), Chem.Atom(8))
+                    rwmol.AddAtom(Chem.Atom(6))
+                    rwmol.AddBond(
+                        atom.GetIdx(),
+                        fragment_molecule.GetNumAtoms(),
+                        order=Chem.BondType.SINGLE,
+                    )
+                    fragment_molecule = rwmol.GetMol()
+
+                # Add Methyl to C4
+                elif label == self.label_C4:
+                    rwmol.ReplaceAtom(atom.GetIdx(), Chem.Atom(6))
+
+                # Add atom index to list
+                else:
+                    atom_index_list.append(atom_index)
+
+            # Generate 2D coordinates
+            fragment_molecule = rwmol.GetMol()
+            Chem.SanitizeMol(fragment_molecule)
+            fragment_molecule.Compute2DCoords()
+
+            # Add capped fragment molecule to list
+            self._capped_monomers.append(fragment_molecule)
+            self._capped_monomers_atom_indices.append(atom_index_list)
+
+        # Generate unique smiles and index
+        smiles_list = [
+            Chem.CanonSmiles(Chem.MolToSmiles(mol)) for mol in self._capped_monomers
+        ]
+        self._capped_monomers_smiles = list(set(smiles_list))
+        self._capped_monomers_index = [
+            smiles_list.index(smiles) for smiles in smiles_list
+        ]
+
+    def _get_bond_list(self):
+        """
+        Get bond index between monomers - used to cut the macrocylic molecule
+        """
+        bond_list = list()
+        dummy_labels = list()
+
+        # Loop over monomers
+        for monomer in self.monomers:
+            # Find monomer in molecule
+            input_matches = self.input_molecule.GetSubstructMatches(monomer)
+
+            # Loop over monomer matches
+            for match in input_matches:
+                # Loop over atom indices in `match`
+                for query_index, atom_index in enumerate(match):
+                    # Select only O1 atom.
+                    if (
+                        self._get_atom_map_number(monomer.GetAtomWithIdx(query_index))
+                        != 1
+                    ):
+                        continue
+
+                    atom = self.input_molecule.GetAtomWithIdx(atom_index)
+
+                    # Loop over neighbors
+                    for neighbor_atom in atom.GetNeighbors():
+                        # Select neighboring atom not in `SubstructMatch` (or in monomer)
+                        if neighbor_atom.GetIdx() in match:
+                            continue
+
+                        # Get bond between atom O1 and its neighbor (i.e. C4)
+                        bond = self.input_molecule.GetBondBetweenAtoms(
+                            atom_index, neighbor_atom.GetIdx()
+                        )
+                        bond_index = bond.GetIdx()
+
+                        # Add to list unique bond_index
+                        if bond_index in bond_list:
+                            continue
+
+                        bond_list.append(bond_index)
+
+                        # Set labels for dummy
+                        if bond.GetBeginAtomIdx() == atom_index:
+                            dummy_labels.append(self.dummy_atom_labels)
+                        else:
+                            dummy_labels.append(self.dummy_atom_labels[::-1])
+
+        return bond_list, dummy_labels