Remove groupbydoc method and consolidate tests. (#601)

glotzerlab · Oct 27, 2022 · b773921 · b773921
1 parent f23e6d3
commit b773921
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 174 deletions.
diff --git a/changelog.txt b/changelog.txt
@@ -16,7 +16,7 @@ Removed
  - The pre-SyncedCollection synchronized dictionary classes, including SyncedDict, SyncedAttrDict, and JSONDict (#577).
  - The old custom JSON encoder and dumps wrapper (#577).
  - The MPIPool and the filesystems.py module (#575).
- - The following Project methods: ``get_id``, ``build_job_search_index``, ``build_job_statepoint_index``, ``find_job_ids``, ``reset_statepoint``, ``update_statepoint``, ``create_access_module``, ``index``, ``dump_statepoints``, ``get_statepoint``, ``read_statepoints``, ``write_statepoints`` (#574, #593, #599).
+ - The following Project methods: ``get_id``, ``build_job_search_index``, ``build_job_statepoint_index``, ``find_job_ids``, ``reset_statepoint``, ``update_statepoint``, ``create_access_module``, ``index``, ``dump_statepoints``, ``get_statepoint``, ``read_statepoints``, ``write_statepoints``, ``groupbydoc`` (#574, #593, #599, #601).
  - The following Job methods: ``get_id`` (#578).
  - The ``syncutil.copytree`` method (#581).
  - All Crawlers, including ``RegexFileCrawler``, ``MainCrawler``, ``MasterCrawler``, ``SignacProjectCrawler``, and ``BaseCrawler``, in addition to all associated functionality in indexing.py (#580).

diff --git a/doc/api.rst b/doc/api.rst
@@ -29,7 +29,6 @@ The Project
     Project.find_jobs
     Project.fn
     Project.groupby
-    Project.groupbydoc
     Project.import_from
     Project.id
     Project.isfile
@@ -62,7 +61,6 @@ The JobsCursor class
 .. autosummary::
     JobsCursor.export_to
     JobsCursor.groupby
-    JobsCursor.groupbydoc
     JobsCursor.to_dataframe
 
 

diff --git a/signac/contrib/project.py b/signac/contrib/project.py
@@ -853,62 +853,13 @@ def groupby(self, key=None, default=None):
         Yields
         ------
         key :
-            Grouped key.
+            Key identifying this group.
         group : iterable of Jobs
-            Iterable of `Job` instances matching this group key.
+            Iterable of `Job` instances matching this group.
 
         """
         yield from self.find_jobs().groupby(key, default=default)
 
-    @deprecated(
-        deprecated_in="1.7",
-        removed_in="2.0",
-        current_version=__version__,
-        details=(
-            "Use groupby with a 'doc.' filter instead, see "
-            "https://docs.signac.io/en/latest/query.html#query-namespaces."
-        ),
-    )
-    def groupbydoc(self, key=None, default=None):
-        """Group jobs according to one or more document values.
-
-        This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
-        the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
-        project.
-
-        Examples
-        --------
-        .. code-block:: python
-
-            # Group jobs by document value 'a'.
-            for key, group in project.groupbydoc('a'):
-                print(key, list(group))
-
-            # Find jobs where job.sp['a'] is 1 and group them
-            # by job.document['b'] and job.document['c'].
-            for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
-                print(key, list(group))
-
-            # Group by whether 'd' is a field in the job.document using a lambda.
-            for key, group in project.groupbydoc(lambda doc: 'd' in doc):
-                print(key, list(group))
-
-        If `key` is None, jobs are grouped by id, placing one job into each group.
-
-        Parameters
-        ----------
-        key : str, iterable, or callable
-            The document grouping parameter(s) passed as a string, iterable
-            of strings, or a callable that will be passed one argument,
-            :attr:`~signac.contrib.job.Job.document` (Default value = None).
-        default :
-            A default value to be used when a given document key is not
-            present. The value must be sortable and is only used if not None
-            (Default value = None).
-
-        """
-        return self.find_jobs().groupbydoc(key, default=default)
-
     def to_dataframe(self, *args, **kwargs):
         r"""Export the project metadata to a pandas :class:`~pandas.DataFrame`.
 
@@ -2099,9 +2050,9 @@ def groupby(self, key=None, default=None):
         Yields
         ------
         key :
-            Grouped key.
+            Key identifying this group.
         group : iterable of Jobs
-            Iterable of `Job` instances matching this group key.
+            Iterable of `Job` instances matching this group.
 
         """
         _filter = self._filter
@@ -2200,87 +2151,6 @@ def keyfunction(job):
             key=keyfunction,
         )
 
-    @deprecated(
-        deprecated_in="1.7",
-        removed_in="2.0",
-        current_version=__version__,
-        details=(
-            "Use groupby with a 'doc.' filter instead, see "
-            "https://docs.signac.io/en/latest/query.html#query-namespaces."
-        ),
-    )
-    def groupbydoc(self, key=None, default=None):
-        """Group jobs according to one or more document values.
-
-        This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
-        the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
-        project.
-
-        Examples
-        --------
-        .. code-block:: python
-
-            # Group jobs by document value 'a'.
-            for key, group in project.groupbydoc('a'):
-                print(key, list(group))
-
-            # Find jobs where job.sp['a'] is 1 and group them
-            # by job.document['b'] and job.document['c'].
-            for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
-                print(key, list(group))
-
-            # Group by whether 'd' is a field in the job.document using a lambda.
-            for key, group in project.groupbydoc(lambda doc: 'd' in doc):
-                print(key, list(group))
-
-        If `key` is None, jobs are grouped by id, placing one job into each group.
-
-        Parameters
-        ----------
-        key : str, iterable, or callable
-            The document grouping parameter(s) passed as a string, iterable
-            of strings, or a callable that will be passed one argument,
-            :attr:`~signac.contrib.job.Job.document` (Default value = None).
-        default :
-            A default value to be used when a given document key is not
-            present. The value must be sortable and is only used if not None
-            (Default value = None).
-
-        """
-        if isinstance(key, str):
-            if default is None:
-
-                def keyfunction(job):
-                    return job.document[key]
-
-            else:
-
-                def keyfunction(job):
-                    return job.document.get(key, default)
-
-        elif isinstance(key, Iterable):
-            if default is None:
-
-                def keyfunction(job):
-                    return tuple(job.document[k] for k in key)
-
-            else:
-
-                def keyfunction(job):
-                    return tuple(job.document.get(k, default) for k in key)
-
-        elif key is None:
-            # Must return a type that can be ordered with <, >
-            def keyfunction(job):
-                return str(job)
-
-        else:
-            # Pass the job document to a callable
-            def keyfunction(job):
-                return key(job.document)
-
-        return groupby(sorted(iter(self), key=keyfunction), key=keyfunction)
-
     def export_to(self, target, path=None, copytree=None):
         """Export all jobs to a target location, such as a directory or a (zipped) archive file.
 

diff --git a/tests/test_project.py b/tests/test_project.py
@@ -938,6 +938,11 @@ def get_doc(i):
             for job in list(g):
                 assert job.document["a"] == k
 
+        for k, g in self.project.groupby("doc.b"):
+            assert len(list(g)) == 6
+            for job in list(g):
+                assert job.document["b"] == k
+
         assert len(list(self.project.groupby("doc.d"))) == 0
         for k, g in self.project.groupby("doc.d", default=-1):
             assert k == -1
@@ -961,6 +966,17 @@ def get_doc(i):
                 assert job.sp["b"] == k[0]
                 assert job.document["c"] == k[1]
 
+        for k, g in self.project.groupby(lambda job: job.doc["a"] % 4):
+            assert len(list(g)) == 3
+            for job in list(g):
+                assert job.document["a"] % 4 == k
+
+        for k, g in self.project.groupby(lambda job: str(job.doc)):
+            assert len(list(g)) == 1
+            for job in list(g):
+                assert str(job.document) == k
+
+        # Make the schema heterogeneous
         self.project.open_job({"a": 20}).init()
         for k, g in self.project.groupby("b"):
             assert len(list(g)) == 6
@@ -972,45 +988,10 @@ def get_doc(i):
                 assert job.sp["b"] == k[0]
                 assert job.sp["c"] == k[1]
 
-    def test_jobs_groupbydoc(self):
-        def get_doc(i):
-            return {"a": i, "b": i % 2, "c": i % 3}
-
-        for i in range(12):
-            job = self.project.open_job({"i": i}).init()
-            job.document = get_doc(i)
-
-        for k, g in self.project.groupbydoc("a"):
-            assert len(list(g)) == 1
-            for job in list(g):
-                assert job.document["a"] == k
-        for k, g in self.project.groupbydoc("b"):
-            assert len(list(g)) == 6
-            for job in list(g):
-                assert job.document["b"] == k
-        with pytest.raises(KeyError):
-            for k, g in self.project.groupbydoc("d"):
-                pass
-        for k, g in self.project.groupbydoc("d", default=-1):
-            assert k == -1
-            assert len(list(g)) == len(self.project)
-        for k, g in self.project.groupbydoc(("b", "c")):
-            assert len(list(g)) == 2
-            for job in list(g):
-                assert job.document["b"] == k[0]
-                assert job.document["c"] == k[1]
-        for k, g in self.project.groupbydoc(lambda doc: doc["a"] % 4):
-            assert len(list(g)) == 3
-            for job in list(g):
-                assert job.document["a"] % 4 == k
-        for k, g in self.project.groupbydoc(lambda doc: str(doc)):
-            assert len(list(g)) == 1
-            for job in list(g):
-                assert str(job.document) == k
         group_count = 0
-        for k, g in self.project.groupbydoc():
+        for k, g in self.project.groupby(lambda job: job.id):
             assert len(list(g)) == 1
-            group_count = group_count + 1
+            group_count += 1
             for job in list(g):
                 assert str(job) == k
         assert group_count == len(list(self.project.find_jobs()))