From 02a5a5dac502883a393e616452e507e68bda0dad Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 10 Aug 2021 16:55:54 -0500 Subject: [PATCH] Remove groupbydoc method and consolidate tests. --- doc/api.rst | 2 - signac/contrib/project.py | 148 ++++---------------------------------- tests/test_project.py | 55 +++++--------- 3 files changed, 30 insertions(+), 175 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index a15543dc6..ee3966e35 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,7 +29,6 @@ The Project Project.find_jobs Project.fn Project.groupby - Project.groupbydoc Project.import_from Project.id Project.isfile @@ -62,7 +61,6 @@ The JobsCursor class .. autosummary:: JobsCursor.export_to JobsCursor.groupby - JobsCursor.groupbydoc JobsCursor.to_dataframe diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 6dd3e97fd..49357e4c8 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -21,7 +21,6 @@ from tempfile import TemporaryDirectory from threading import RLock -from deprecation import deprecated from packaging import version from ..common.config import Config, get_config, load_config @@ -846,65 +845,16 @@ def groupby(self, key=None, default=None): present. The value must be sortable and is only used if not None (Default value = None). - Returns - ------- - key : str - Grouped key. + Yields + ------ + key : + Key identifying this group. group : iterable of Jobs - Iterable of `Job` instances matching this group key. + Iterable of `Job` instances matching this group. """ return self.find_jobs().groupby(key, default=default) - @deprecated( - deprecated_in="1.7", - removed_in="2.0", - current_version=__version__, - details=( - "Use groupby with a 'doc.' filter instead, see " - "https://docs.signac.io/en/latest/query.html#query-namespaces." - ), - ) - def groupbydoc(self, key=None, default=None): - """Group jobs according to one or more document values. - - This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as - the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a - project. - - Examples - -------- - .. code-block:: python - - # Group jobs by document value 'a'. - for key, group in project.groupbydoc('a'): - print(key, list(group)) - - # Find jobs where job.sp['a'] is 1 and group them - # by job.document['b'] and job.document['c']. - for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')): - print(key, list(group)) - - # Group by whether 'd' is a field in the job.document using a lambda. - for key, group in project.groupbydoc(lambda doc: 'd' in doc): - print(key, list(group)) - - If `key` is None, jobs are grouped by id, placing one job into each group. - - Parameters - ---------- - key : str, iterable, or callable - The document grouping parameter(s) passed as a string, iterable - of strings, or a callable that will be passed one argument, - :attr:`~signac.contrib.job.Job.document` (Default value = None). - default : - A default value to be used when a given document key is not - present. The value must be sortable and is only used if not None - (Default value = None). - - """ - return self.find_jobs().groupbydoc(key, default=default) - def to_dataframe(self, *args, **kwargs): r"""Export the project metadata to a pandas :class:`~pandas.DataFrame`. @@ -2063,6 +2013,13 @@ def groupby(self, key=None, default=None): present. The value must be sortable and is only used if not None (Default value = None). + Yields + ------ + key : + Key identifying this group. + group : iterable of Jobs + Iterable of `Job` instances matching this group. + """ _filter = self._filter @@ -2160,87 +2117,6 @@ def keyfunction(job): key=keyfunction, ) - @deprecated( - deprecated_in="1.7", - removed_in="2.0", - current_version=__version__, - details=( - "Use groupby with a 'doc.' filter instead, see " - "https://docs.signac.io/en/latest/query.html#query-namespaces." - ), - ) - def groupbydoc(self, key=None, default=None): - """Group jobs according to one or more document values. - - This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as - the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a - project. - - Examples - -------- - .. code-block:: python - - # Group jobs by document value 'a'. - for key, group in project.groupbydoc('a'): - print(key, list(group)) - - # Find jobs where job.sp['a'] is 1 and group them - # by job.document['b'] and job.document['c']. - for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')): - print(key, list(group)) - - # Group by whether 'd' is a field in the job.document using a lambda. - for key, group in project.groupbydoc(lambda doc: 'd' in doc): - print(key, list(group)) - - If `key` is None, jobs are grouped by id, placing one job into each group. - - Parameters - ---------- - key : str, iterable, or callable - The document grouping parameter(s) passed as a string, iterable - of strings, or a callable that will be passed one argument, - :attr:`~signac.contrib.job.Job.document` (Default value = None). - default : - A default value to be used when a given document key is not - present. The value must be sortable and is only used if not None - (Default value = None). - - """ - if isinstance(key, str): - if default is None: - - def keyfunction(job): - return job.document[key] - - else: - - def keyfunction(job): - return job.document.get(key, default) - - elif isinstance(key, Iterable): - if default is None: - - def keyfunction(job): - return tuple(job.document[k] for k in key) - - else: - - def keyfunction(job): - return tuple(job.document.get(k, default) for k in key) - - elif key is None: - # Must return a type that can be ordered with <, > - def keyfunction(job): - return str(job) - - else: - # Pass the job document to a callable - def keyfunction(job): - return key(job.document) - - return groupby(sorted(iter(self), key=keyfunction), key=keyfunction) - def export_to(self, target, path=None, copytree=None): """Export all jobs to a target location, such as a directory or a (zipped) archive file. diff --git a/tests/test_project.py b/tests/test_project.py index ba22e356e..102238453 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -927,6 +927,11 @@ def get_doc(i): for job in list(g): assert job.document["a"] == k + for k, g in self.project.groupby("doc.b"): + assert len(list(g)) == 6 + for job in list(g): + assert job.document["b"] == k + assert len(list(self.project.groupby("doc.d"))) == 0 for k, g in self.project.groupby("doc.d", default=-1): assert k == -1 @@ -950,6 +955,17 @@ def get_doc(i): assert job.sp["b"] == k[0] assert job.document["c"] == k[1] + for k, g in self.project.groupby(lambda job: job.doc["a"] % 4): + assert len(list(g)) == 3 + for job in list(g): + assert job.document["a"] % 4 == k + + for k, g in self.project.groupby(lambda job: str(job.doc)): + assert len(list(g)) == 1 + for job in list(g): + assert str(job.document) == k + + # Make the schema heterogeneous self.project.open_job({"a": 20}).init() for k, g in self.project.groupby("b"): assert len(list(g)) == 6 @@ -961,45 +977,10 @@ def get_doc(i): assert job.sp["b"] == k[0] assert job.sp["c"] == k[1] - def test_jobs_groupbydoc(self): - def get_doc(i): - return {"a": i, "b": i % 2, "c": i % 3} - - for i in range(12): - job = self.project.open_job({"i": i}).init() - job.document = get_doc(i) - - for k, g in self.project.groupbydoc("a"): - assert len(list(g)) == 1 - for job in list(g): - assert job.document["a"] == k - for k, g in self.project.groupbydoc("b"): - assert len(list(g)) == 6 - for job in list(g): - assert job.document["b"] == k - with pytest.raises(KeyError): - for k, g in self.project.groupbydoc("d"): - pass - for k, g in self.project.groupbydoc("d", default=-1): - assert k == -1 - assert len(list(g)) == len(self.project) - for k, g in self.project.groupbydoc(("b", "c")): - assert len(list(g)) == 2 - for job in list(g): - assert job.document["b"] == k[0] - assert job.document["c"] == k[1] - for k, g in self.project.groupbydoc(lambda doc: doc["a"] % 4): - assert len(list(g)) == 3 - for job in list(g): - assert job.document["a"] % 4 == k - for k, g in self.project.groupbydoc(lambda doc: str(doc)): - assert len(list(g)) == 1 - for job in list(g): - assert str(job.document) == k group_count = 0 - for k, g in self.project.groupbydoc(): + for k, g in self.project.groupby(lambda job: job.id): assert len(list(g)) == 1 - group_count = group_count + 1 + group_count += 1 for job in list(g): assert str(job) == k assert group_count == len(list(self.project.find_jobs()))