From b00d502d90dbb98be5b7e7ce9f17d82ae5d4d49f Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:34:16 +0200 Subject: [PATCH 01/11] Update nucleicacids.py added backend and aggregators --- package/MDAnalysis/analysis/nucleicacids.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 0eccd039ba4..69e79b383e9 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -161,6 +161,12 @@ class NucPairDist(AnalysisBase): helper for selecting atom pairs for distance analysis. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int @@ -292,6 +298,11 @@ def _conclude(self) -> None: self.results['pair_distances'] = self.results['distances'] # TODO: remove pair_distances in 3.0.0 + def _get_aggregator(self): + return ResultsGroup(lookup={ + 'distances': ResultsGroup.ndarray_vstack, + 'pair_distances': ResultsGroup.ndarray_vstack,} + ) class WatsonCrickDist(NucPairDist): r""" From dc03a0248f2d27445fe22d207ebfe113570d4e2c Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:36:08 +0200 Subject: [PATCH 02/11] Update conftest.py added NucPairDist to the conftest.py --- testsuite/MDAnalysisTests/analysis/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index fc3c8a480c7..6cada0d6d6d 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -14,6 +14,7 @@ from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( HydrogenBondAnalysis, ) +from MDAnalysis.analysis.nucleicacids import NucPairDist from MDAnalysis.lib.util import is_installed @@ -141,3 +142,10 @@ def client_DSSP(request): @pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis)) def client_HydrogenBondAnalysis(request): return request.param + + +# MDAnalysis.analysis.nucleicacids + +@pytest.fixture(scope="module", params=params_for_cls(NucPairDist)) +def client_NucPairDist(request): + return request.param From 39612c842993f74b77f51a365d6459e3c97a3586 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:37:30 +0200 Subject: [PATCH 03/11] Update test_nucleicacids.py added client_NucPairDist to the tests --- .../MDAnalysisTests/analysis/test_nucleicacids.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py b/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py index fb7d39374cd..80e943f4379 100644 --- a/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py +++ b/testsuite/MDAnalysisTests/analysis/test_nucleicacids.py @@ -55,12 +55,12 @@ def test_empty_ag_error(strand): @pytest.fixture(scope='module') -def wc_rna(strand): +def wc_rna(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[0], strand.residues[21]]) strand2 = ResidueGroup([strand.residues[1], strand.residues[22]]) WC = WatsonCrickDist(strand1, strand2) - WC.run() + WC.run(**client_NucPairDist) return WC @@ -114,23 +114,23 @@ def test_wc_dis_results_keyerrs(wc_rna, key): wc_rna.results[key] -def test_minor_dist(strand): +def test_minor_dist(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[2], strand.residues[19]]) strand2 = ResidueGroup([strand.residues[16], strand.residues[4]]) MI = MinorPairDist(strand1, strand2) - MI.run() + MI.run(**client_NucPairDist) assert MI.results.distances[0, 0] == approx(15.06506, rel=1e-3) assert MI.results.distances[0, 1] == approx(3.219116, rel=1e-3) -def test_major_dist(strand): +def test_major_dist(strand, client_NucPairDist): strand1 = ResidueGroup([strand.residues[1], strand.residues[4]]) strand2 = ResidueGroup([strand.residues[11], strand.residues[8]]) MA = MajorPairDist(strand1, strand2) - MA.run() + MA.run(**client_NucPairDist) assert MA.results.distances[0, 0] == approx(26.884272, rel=1e-3) assert MA.results.distances[0, 1] == approx(13.578535, rel=1e-3) From 8feb071042676c1855b0b480d4d078eee3b0f85a Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Wed, 9 Oct 2024 17:58:49 -0700 Subject: [PATCH 04/11] fix nucl parallel --- package/MDAnalysis/analysis/nucleicacids.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 69e79b383e9..867fd16642a 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -70,7 +70,7 @@ import MDAnalysis as mda from .distances import calc_bonds -from .base import AnalysisBase, Results +from .base import AnalysisBase, ResultsGroup from MDAnalysis.core.groups import Residue, ResidueGroup @@ -282,7 +282,7 @@ def select_strand_atoms( return (sel1, sel2) def _prepare(self) -> None: - self._res_array: np.ndarray = np.zeros( + self.results.distances: np.ndarray = np.zeros( [self.n_frames, self._n_sel] ) @@ -291,17 +291,16 @@ def _single_frame(self) -> None: self._s1.positions, self._s2.positions ) - self._res_array[self._frame_index, :] = dist + self.results.distances[self._frame_index, :] = dist def _conclude(self) -> None: - self.results['distances'] = self._res_array self.results['pair_distances'] = self.results['distances'] # TODO: remove pair_distances in 3.0.0 def _get_aggregator(self): return ResultsGroup(lookup={ 'distances': ResultsGroup.ndarray_vstack, - 'pair_distances': ResultsGroup.ndarray_vstack,} + } ) class WatsonCrickDist(NucPairDist): From b2d81b006c19c207f6284b9e4a6864936017f588 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:10:50 +0200 Subject: [PATCH 05/11] Update nucleicacids.py fixing PEP --- package/MDAnalysis/analysis/nucleicacids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 867fd16642a..a8806febd46 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -166,7 +166,7 @@ class NucPairDist(AnalysisBase): @classmethod def get_supported_backends(cls): return ('serial', 'multiprocessing', 'dask',) - + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int @@ -299,7 +299,7 @@ def _conclude(self) -> None: def _get_aggregator(self): return ResultsGroup(lookup={ - 'distances': ResultsGroup.ndarray_vstack, + 'distances': ResultsGroup.ndarray_vstack, } ) From e30716b819621cfa209891203197bafff84100d0 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:12:28 +0200 Subject: [PATCH 06/11] Update nucleicacids.py --- package/MDAnalysis/analysis/nucleicacids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index a8806febd46..da08604a575 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -166,7 +166,7 @@ class NucPairDist(AnalysisBase): @classmethod def get_supported_backends(cls): return ('serial', 'multiprocessing', 'dask',) - + _s1: mda.AtomGroup _s2: mda.AtomGroup _n_sel: int From 883eb1ed2794fad51fa6e15da102c51723a0af31 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:14:22 +0200 Subject: [PATCH 07/11] Update nucleicacids.py added versionchanged for addition of parallelization --- package/MDAnalysis/analysis/nucleicacids.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index da08604a575..f0649be6d50 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -159,6 +159,11 @@ class NucPairDist(AnalysisBase): .. versionchanged:: 2.7.0 Added static method :attr:`select_strand_atoms` as a helper for selecting atom pairs for distance analysis. + + .. versionchanged:: 2.8.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ _analysis_algorithm_is_parallelizable = True From 701ed01621624d31a98b462937e7544f8b988431 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:16:10 +0200 Subject: [PATCH 08/11] Update nucleicacids.py --- package/MDAnalysis/analysis/nucleicacids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index f0649be6d50..5f5bdd9d46d 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -161,8 +161,8 @@ class NucPairDist(AnalysisBase): helper for selecting atom pairs for distance analysis. .. versionchanged:: 2.8.0 - Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` - backends; use the new method :meth:`get_supported_backends` to see all + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all supported backends. """ From 82a89f9593bf2fafd51294c8f61e0101f56c28f8 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:20:42 +0200 Subject: [PATCH 09/11] Update conftest.py --- testsuite/MDAnalysisTests/analysis/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 6cada0d6d6d..a60b565f1c6 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -145,7 +145,7 @@ def client_HydrogenBondAnalysis(request): # MDAnalysis.analysis.nucleicacids - + @pytest.fixture(scope="module", params=params_for_cls(NucPairDist)) def client_NucPairDist(request): return request.param From 886832ef43aac2bf4615c657463c95046ba3c14f Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:23:42 +0200 Subject: [PATCH 10/11] Update CHANGELOG Added Parallelization of nucleicacids.py and fixed lettering --- package/CHANGELOG | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index b284ffddeec..6c266e1cfc4 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -59,12 +59,13 @@ Enhancements * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) - * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) - * enables parallelization for analysis.bat.BAT (Issue #4663) - * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} + * Explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) + * Enables parallelization for analysis.bat.BAT (Issue #4663) + * Enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} (Issue #4673) - * enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) + * Enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674) * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) + * Enables parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` From c722ac4f7f67b0fd0c15d3d61d057e8492cb0541 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:27:57 +0200 Subject: [PATCH 11/11] Update nucleicacids.py Addition of mention of modification to self.results.distances --- package/MDAnalysis/analysis/nucleicacids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/MDAnalysis/analysis/nucleicacids.py b/package/MDAnalysis/analysis/nucleicacids.py index 5f5bdd9d46d..a7a9a6c3db5 100644 --- a/package/MDAnalysis/analysis/nucleicacids.py +++ b/package/MDAnalysis/analysis/nucleicacids.py @@ -164,6 +164,8 @@ class NucPairDist(AnalysisBase): Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` backends; use the new method :meth:`get_supported_backends` to see all supported backends. + The 'self._res_array' was modified to 'self.results.distances' to + enable the implementation of the parallelization. """ _analysis_algorithm_is_parallelizable = True