From 08918c7860e41d75fa397970187a64fa43c14222 Mon Sep 17 00:00:00 2001 From: Chris Iacovella Date: Fri, 28 Apr 2023 12:46:03 -0700 Subject: [PATCH] speeding up add function for large compounds (#1107) * updated add routine to compose bond graphs of a list together before composing with main compound * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change flattening routine for list, as it was causing a failure in the add remove bond test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * some how left a conflict statement in...not sure why it is causing a conflict to begin with * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added in sped up routine for loading mdtraj trajectories * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added in sped up routine for loading parmed trajectories * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added new tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added tests for helper functions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added condense function for Compounds * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added tests to condense function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * addressed comments from cal * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor changes in codes and arrangmenet * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix style * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix unit test --------- Co-authored-by: Christopher Iacovella Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christopher Iacovella Co-authored-by: Co Quach --- mbuild/compound.py | 133 +++++++++++++++++++++++++++++++--- mbuild/conversion.py | 45 ++++++++++-- mbuild/tests/test_compound.py | 113 +++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 17 deletions(-) diff --git a/mbuild/compound.py b/mbuild/compound.py index e39c03ac8..b09569b3f 100644 --- a/mbuild/compound.py +++ b/mbuild/compound.py @@ -13,6 +13,7 @@ import ele import networkx as nx import numpy as np +from boltons.setutils import IndexedSet from ele.element import Element, element_from_name, element_from_symbol from ele.exceptions import ElementError from treelib import Tree @@ -811,8 +812,8 @@ def add( ---------- new_child : mb.Compound or list-like of mb.Compound The object(s) to be added to this Compound. - label : str, optional, default None - A descriptive string for the part. + label : str, or list-like of str, optional, default None + A descriptive string for the part; if a list, must be the same length/shape as new_child. containment : bool, optional, default=True Add the part to self.children. replace : bool, optional, default=True @@ -829,11 +830,56 @@ def add( to add Compounds to an existing rigid body. """ # Support batch add via lists, tuples and sets. + # If iterable, we will first compose all the bondgraphs of individual + # Compounds in the list for efficiency from mbuild.port import Port if isinstance(new_child, Iterable) and not isinstance(new_child, str): - for child in new_child: - self.add(child, reset_rigid_ids=reset_rigid_ids) + compound_list = [c for c in _flatten_list(new_child)] + if label is not None and isinstance(label, (list, tuple)): + label_list = [c for c in _flatten_list(label)] + if len(label_list) != len(compound_list): + raise ValueError( + "The list-like object for label must be the same length as" + "the list-like object of child Compounds. " + f"total length of labels: {len(label_list)}, new_child: {len(new_child)}." + ) + temp_bond_graphs = [] + for child in compound_list: + # create a list of bond graphs of the children to add + if containment: + if child.bond_graph and not isinstance(self, Port): + temp_bond_graphs.append(child.bond_graph) + + # compose children bond_graphs; make sure we actually have graphs to compose + children_bond_graph = None + if len(temp_bond_graphs) != 0: + children_bond_graph = nx.compose_all(temp_bond_graphs) + + if ( + temp_bond_graphs + and not isinstance(self, Port) + and children_bond_graph is not None + ): + # If anything is added at self level, it is no longer a particle + # search for self in self.root.bond_graph and remove self + if self.root.bond_graph.has_node(self): + self.root.bond_graph.remove_node(self) + # compose the bond graph of all the children with the root + self.root.bond_graph = nx.compose( + self.root.bond_graph, children_bond_graph + ) + for i, child in enumerate(compound_list): + child.bond_graph = None + if label is not None: + self.add( + child, + label=label_list[i], + reset_rigid_ids=reset_rigid_ids, + ) + else: + self.add(child, reset_rigid_ids=reset_rigid_ids) + return if not isinstance(new_child, Compound): @@ -1859,9 +1905,9 @@ def _visualize_nglview(self, show_ports=False, color_scheme={}): mdtraj = import_("mdtraj") from mdtraj.geometry.sasa import _ATOMIC_RADII - remove_digits = lambda x: "".join( - i for i in x if not i.isdigit() or i == "_" - ) + def remove_digits(x): + return "".join(i for i in x if not i.isdigit() or i == "_") + for particle in self.particles(): particle.name = remove_digits(particle.name).upper() if not particle.name: @@ -1899,6 +1945,58 @@ def _visualize_nglview(self, show_ports=False, color_scheme={}): overwrite_nglview_default(widget) return widget + def condense(self, inplace=True): + """Condense the hierarchical structure of the Compound to the level of molecules. + + Modify the mBuild Compound to become a Compound with 3 distinct levels in the hierarchy. + The top level container (self), contains molecules (i.e., connected Compounds) and the + third level represents Particles (i.e., Compounds with no children). + If the system contains a Particle(s) without any connections to other Compounds, it will + appear in the 2nd level (with the top level self as a parent). + + Parameter + --------- + inplace : bool, optional, default=True + Option to perform the condense operation inplace or return a copy + + Return + ------ + self : mb.Compound or None + return a condensed Compound if inplace is False. + """ + # temporary list of components + comp_list = [] + connected_subgraph = self.root.bond_graph.connected_components() + + for molecule in connected_subgraph: + if len(molecule) == 1: + ancestors = [molecule[0]] + else: + ancestors = IndexedSet(molecule[0].ancestors()) + for particle in molecule[1:]: + # This works because the way in which particle.ancestors is + # traversed, the lower level will be in the front. + # The intersection will be left at the end, + # ancestor of the first particle is used as reference. + # Hence, this called will return the lowest-level Compound + # that is a molecule + ancestors = ancestors.intersection( + IndexedSet(particle.ancestors()) + ) + + """Parse molecule information""" + molecule_tag = ancestors[0] + comp_list.append(clone(molecule_tag)) + if inplace: + for child in [self.children]: + # Need to handle the case when child is a port + self.remove(child) + self.add(comp_list) + else: + new_compound = Compound(name=self.name) + new_compound.add(comp_list) + return new_compound + def flatten(self, inplace=True): """Flatten the hierarchical structure of the Compound. @@ -1913,7 +2011,7 @@ def flatten(self, inplace=True): Return ------ self : mb.Compound or None - return a flatten Compound if inplace is False. + return a flattened Compound if inplace is False. """ ports_list = list(self.all_ports()) children_list = list(self.children) @@ -2554,8 +2652,10 @@ def _energy_minimize_openbabel( f"Cannot create a constraint between a Particle and itself: {p1} {p2} ." ) - pid_1 = particle_idx[id(p1)] + 1 # openbabel indices start at 1 - pid_2 = particle_idx[id(p2)] + 1 # openbabel indices start at 1 + # openbabel indices start at 1 + pid_1 = particle_idx[id(p1)] + 1 + # openbabel indices start at 1 + pid_2 = particle_idx[id(p2)] + 1 dist = ( con_temp[1] * 10.0 ) # obenbabel uses angstroms, not nm, convert to angstroms @@ -3421,3 +3521,16 @@ def _clone_bonds(self, clone_of=None): Particle = Compound + + +def _flatten_list(c_list): + """Flatten a list. + + Helper function to flatten a list that may be nested, e.g. [comp1, [comp2, comp3]]. + """ + if isinstance(c_list, Iterable) and not isinstance(c_list, str): + for c in c_list: + if isinstance(c, Iterable) and not isinstance(c, str): + yield from _flatten_list(c) + else: + yield c diff --git a/mbuild/conversion.py b/mbuild/conversion.py index e2341b349..50e0f2bb8 100644 --- a/mbuild/conversion.py +++ b/mbuild/conversion.py @@ -554,19 +554,23 @@ def from_parmed( chains[residue.chain].append(residue) # Build up compound + chain_list = [] for chain, residues in chains.items(): if len(chain) > 1: chain_compound = mb.Compound() - compound.add(chain_compound, chain_id) + chain_list.append(chain_compound) else: chain_compound = compound + res_list = [] for residue in residues: if infer_hierarchy: residue_compound = mb.Compound(name=residue.name) - chain_compound.add(residue_compound) parent_compound = residue_compound + res_list.append(residue_compound) else: parent_compound = chain_compound + atom_list = [] + atom_label_list = [] for atom in residue.atoms: # Angstrom to nm pos = np.array([atom.xx, atom.xy, atom.xz]) / 10 @@ -577,8 +581,14 @@ def from_parmed( new_atom = mb.Particle( name=str(atom.name), pos=pos, element=element ) - parent_compound.add(new_atom, label="{0}[$]".format(atom.name)) + atom_list.append(new_atom) + atom_label_list.append("{0}[$]".format(atom.name)) atom_mapping[atom] = new_atom + parent_compound.add(atom_list, label=atom_label_list) + if infer_hierarchy: + chain_compound.add(res_list) + if len(chain) > 1: + compound.add(chain_list) # Infer bonds information for bond in structure.bonds: @@ -650,19 +660,29 @@ def from_trajectory( compound = mb.Compound() atom_mapping = dict() + # temporary lists to speed up add to the compound + chains_list = [] + chains_list_label = [] + for chain in traj.topology.chains: if traj.topology.n_chains > 1: chain_compound = mb.Compound() - compound.add(chain_compound, "chain[$]") + chains_list.append(chain_compound) + chains_list_label.append("chain[$]") else: chain_compound = compound + + res_list = [] for res in chain.residues: if infer_hierarchy: res_compound = mb.Compound(name=res.name) - chain_compound.add(res_compound) parent_cmpd = res_compound + res_list.append(res_compound) else: parent_cmpd = chain_compound + + atom_list = [] + atom_label_list = [] for atom in res.atoms: try: element = element_from_atomic_number( @@ -675,9 +695,17 @@ def from_trajectory( pos=traj.xyz[frame, atom.index], element=element, ) - parent_cmpd.add(new_atom, label="{0}[$]".format(atom.name)) + atom_list.append(new_atom) + atom_label_list.append("{0}[$]".format(atom.name)) atom_mapping[atom] = new_atom + parent_cmpd.add(atom_list, label=atom_label_list) + + if infer_hierarchy: + chain_compound.add(res_list) + if traj.topology.n_chains > 1: + compound.add(chains_list, label=chains_list_label) + for mdtraj_atom1, mdtraj_atom2 in traj.topology.bonds: atom1 = atom_mapping[mdtraj_atom1] atom2 = atom_mapping[mdtraj_atom2] @@ -846,13 +874,16 @@ def from_rdkit(rdkit_mol, compound=None, coords_only=False, smiles_seed=0): else: comp = compound + part_list = [] for i, atom in enumerate(mymol.GetAtoms()): part = mb.Particle( name=atom.GetSymbol(), element=element_from_atomic_number(atom.GetAtomicNum()), pos=xyz[i], ) - comp.add(part) + part_list.append(part) + + comp.add(part_list) for bond in mymol.GetBonds(): comp.add_bond( diff --git a/mbuild/tests/test_compound.py b/mbuild/tests/test_compound.py index d82a88714..99c19e1a5 100644 --- a/mbuild/tests/test_compound.py +++ b/mbuild/tests/test_compound.py @@ -621,6 +621,51 @@ def test_add_label_exists(self, ethane, h2o): with pytest.raises(MBuildError): ethane.add(mb.clone(h2o), label="water") + def test_list_flatten(self, h2o): + from mbuild.compound import _flatten_list + + out = [a for a in _flatten_list(["one", "two", ["three", "four"]])] + assert out == ["one", "two", "three", "four"] + + one = mb.clone(h2o) + one.name = "one" + two = mb.clone(h2o) + two.name = "two" + three = mb.clone(h2o) + three.name = "three" + four = mb.clone(h2o) + four.name = "four" + out = [a.name for a in _flatten_list([one, two, [three, four]])] + + assert out == ["one", "two", "three", "four"] + + def test_add_by_list(self, h2o): + temp_comp = mb.Compound() + comp_list = [] + label_list = [] + for j in range(0, 5): + comp_list.append(mb.clone(h2o)) + label_list.append("water[$]") + temp_comp.add(comp_list, label=label_list) + a = [k for k, v in temp_comp.labels.items()] + assert a == [ + "water", + "water[0]", + "water[1]", + "water[2]", + "water[3]", + "water[4]", + ] + + temp_comp = mb.Compound() + comp_list = [] + label_list = ["water"] + for j in range(0, 5): + comp_list.append(mb.clone(h2o)) + + with pytest.raises(ValueError): + temp_comp.add(comp_list, label=label_list) + def test_set_pos(self, ethane): with pytest.raises(MBuildError): ethane.pos = [0, 0, 0] @@ -877,6 +922,74 @@ def test_particle_in_particle(self): assert len(list(parent.ancestors())) == 0 assert next(parent.particles_by_name("A")) == part + def test_condense(self, ethane): + ethanes = mb.Compound() + ethanes.add(mb.clone(ethane)) + ethanes.add(mb.clone(ethane)) + system = mb.Compound() + system.add(ethanes) + system_hierarchy = system.print_hierarchy(show_tree=False) + + # Before we condese + assert len(system.children) == 1 + assert len(ethanes.children) == 2 + assert system.n_bonds == 14 + assert system.n_particles == 16 + assert system_hierarchy.depth() == 4 + + # Condense the Compound, returning a copy + condensed = system.condense(inplace=False) + condensed_hierarchy = condensed.print_hierarchy(show_tree=False) + + assert len(condensed.children) == 2 + assert condensed.n_bonds == 14 + assert condensed.n_particles == 16 + + assert condensed_hierarchy.depth() == 3 + assert ( + condensed_hierarchy.to_json(with_data=False) + == '{"Compound, 16 particles, 14 bonds, 2 children": {"children": [{"[Ethane x 2], 8 particles, 7 bonds, 2 children": {"children": [{"[CH3 x 2], 4 particles, 3 bonds, 4 children": {"children": ["[C x 1], 1 particles, 4 bonds, 0 children", "[H x 3], 1 particles, 1 bonds, 0 children"]}}]}}]}}' + ) + + # Condense the Compound in place + system_copy = mb.clone(system) + system_copy.condense(inplace=True) + condensed_hierarchy2 = system_copy.print_hierarchy(show_tree=False) + + assert len(system_copy.children) == 2 + assert system_copy.n_bonds == 14 + assert system_copy.n_particles == 16 + assert condensed_hierarchy2.depth() == 3 + assert ( + condensed_hierarchy2.to_json(with_data=False) + == '{"Compound, 16 particles, 14 bonds, 2 children": {"children": [{"[Ethane x 2], 8 particles, 7 bonds, 2 children": {"children": [{"[CH3 x 2], 4 particles, 3 bonds, 4 children": {"children": ["[C x 1], 1 particles, 4 bonds, 0 children", "[H x 3], 1 particles, 1 bonds, 0 children"]}}]}}]}}' + ) + + # add two particles that aren't bonded + system.add(mb.Compound(name="C")) + system.add(mb.Compound(name="C")) + system_hierarchy = system.print_hierarchy(show_tree=False) + + assert len(system.children) == 3 + assert len(ethanes.children) == 2 + assert system.n_bonds == 14 + assert system.n_particles == 18 + assert system_hierarchy.depth() == 4 + + # condense the system + condensed = system.condense(inplace=False) + condensed_hierarchy = condensed.print_hierarchy(show_tree=False) + + assert len(condensed.children) == 4 + assert condensed.n_bonds == 14 + assert condensed.n_particles == 18 + + assert condensed_hierarchy.depth() == 3 + assert ( + condensed_hierarchy.to_json(with_data=False) + == '{"Compound, 18 particles, 14 bonds, 4 children": {"children": ["[C x 2], 1 particles, 0 bonds, 0 children", {"[Ethane x 2], 8 particles, 7 bonds, 2 children": {"children": [{"[CH3 x 2], 4 particles, 3 bonds, 4 children": {"children": ["[C x 1], 1 particles, 4 bonds, 0 children", "[H x 3], 1 particles, 1 bonds, 0 children"]}}]}}]}}' + ) + def test_flatten_eth(self, ethane): # Before flattening assert len(ethane.children) == 2