From 02a5a5dac502883a393e616452e507e68bda0dad Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 10 Aug 2021 16:55:54 -0500
Subject: [PATCH] Remove groupbydoc method and consolidate tests.

---
 doc/api.rst               |   2 -
 signac/contrib/project.py | 148 ++++----------------------------------
 tests/test_project.py     |  55 +++++---------
 3 files changed, 30 insertions(+), 175 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index a15543dc6..ee3966e35 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -29,7 +29,6 @@ The Project
     Project.find_jobs
     Project.fn
     Project.groupby
-    Project.groupbydoc
     Project.import_from
     Project.id
     Project.isfile
@@ -62,7 +61,6 @@ The JobsCursor class
 .. autosummary::
     JobsCursor.export_to
     JobsCursor.groupby
-    JobsCursor.groupbydoc
     JobsCursor.to_dataframe
 
 
diff --git a/signac/contrib/project.py b/signac/contrib/project.py
index 6dd3e97fd..49357e4c8 100644
--- a/signac/contrib/project.py
+++ b/signac/contrib/project.py
@@ -21,7 +21,6 @@
 from tempfile import TemporaryDirectory
 from threading import RLock
 
-from deprecation import deprecated
 from packaging import version
 
 from ..common.config import Config, get_config, load_config
@@ -846,65 +845,16 @@ def groupby(self, key=None, default=None):
             present. The value must be sortable and is only used if not None
             (Default value = None).
 
-        Returns
-        -------
-        key : str
-            Grouped key.
+        Yields
+        ------
+        key :
+            Key identifying this group.
         group : iterable of Jobs
-            Iterable of `Job` instances matching this group key.
+            Iterable of `Job` instances matching this group.
 
         """
         return self.find_jobs().groupby(key, default=default)
 
-    @deprecated(
-        deprecated_in="1.7",
-        removed_in="2.0",
-        current_version=__version__,
-        details=(
-            "Use groupby with a 'doc.' filter instead, see "
-            "https://docs.signac.io/en/latest/query.html#query-namespaces."
-        ),
-    )
-    def groupbydoc(self, key=None, default=None):
-        """Group jobs according to one or more document values.
-
-        This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
-        the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
-        project.
-
-        Examples
-        --------
-        .. code-block:: python
-
-            # Group jobs by document value 'a'.
-            for key, group in project.groupbydoc('a'):
-                print(key, list(group))
-
-            # Find jobs where job.sp['a'] is 1 and group them
-            # by job.document['b'] and job.document['c'].
-            for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
-                print(key, list(group))
-
-            # Group by whether 'd' is a field in the job.document using a lambda.
-            for key, group in project.groupbydoc(lambda doc: 'd' in doc):
-                print(key, list(group))
-
-        If `key` is None, jobs are grouped by id, placing one job into each group.
-
-        Parameters
-        ----------
-        key : str, iterable, or callable
-            The document grouping parameter(s) passed as a string, iterable
-            of strings, or a callable that will be passed one argument,
-            :attr:`~signac.contrib.job.Job.document` (Default value = None).
-        default :
-            A default value to be used when a given document key is not
-            present. The value must be sortable and is only used if not None
-            (Default value = None).
-
-        """
-        return self.find_jobs().groupbydoc(key, default=default)
-
     def to_dataframe(self, *args, **kwargs):
         r"""Export the project metadata to a pandas :class:`~pandas.DataFrame`.
 
@@ -2063,6 +2013,13 @@ def groupby(self, key=None, default=None):
             present. The value must be sortable and is only used if not None
             (Default value = None).
 
+        Yields
+        ------
+        key :
+            Key identifying this group.
+        group : iterable of Jobs
+            Iterable of `Job` instances matching this group.
+
         """
         _filter = self._filter
 
@@ -2160,87 +2117,6 @@ def keyfunction(job):
             key=keyfunction,
         )
 
-    @deprecated(
-        deprecated_in="1.7",
-        removed_in="2.0",
-        current_version=__version__,
-        details=(
-            "Use groupby with a 'doc.' filter instead, see "
-            "https://docs.signac.io/en/latest/query.html#query-namespaces."
-        ),
-    )
-    def groupbydoc(self, key=None, default=None):
-        """Group jobs according to one or more document values.
-
-        This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
-        the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
-        project.
-
-        Examples
-        --------
-        .. code-block:: python
-
-            # Group jobs by document value 'a'.
-            for key, group in project.groupbydoc('a'):
-                print(key, list(group))
-
-            # Find jobs where job.sp['a'] is 1 and group them
-            # by job.document['b'] and job.document['c'].
-            for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
-                print(key, list(group))
-
-            # Group by whether 'd' is a field in the job.document using a lambda.
-            for key, group in project.groupbydoc(lambda doc: 'd' in doc):
-                print(key, list(group))
-
-        If `key` is None, jobs are grouped by id, placing one job into each group.
-
-        Parameters
-        ----------
-        key : str, iterable, or callable
-            The document grouping parameter(s) passed as a string, iterable
-            of strings, or a callable that will be passed one argument,
-            :attr:`~signac.contrib.job.Job.document` (Default value = None).
-        default :
-            A default value to be used when a given document key is not
-            present. The value must be sortable and is only used if not None
-            (Default value = None).
-
-        """
-        if isinstance(key, str):
-            if default is None:
-
-                def keyfunction(job):
-                    return job.document[key]
-
-            else:
-
-                def keyfunction(job):
-                    return job.document.get(key, default)
-
-        elif isinstance(key, Iterable):
-            if default is None:
-
-                def keyfunction(job):
-                    return tuple(job.document[k] for k in key)
-
-            else:
-
-                def keyfunction(job):
-                    return tuple(job.document.get(k, default) for k in key)
-
-        elif key is None:
-            # Must return a type that can be ordered with <, >
-            def keyfunction(job):
-                return str(job)
-
-        else:
-            # Pass the job document to a callable
-            def keyfunction(job):
-                return key(job.document)
-
-        return groupby(sorted(iter(self), key=keyfunction), key=keyfunction)
-
     def export_to(self, target, path=None, copytree=None):
         """Export all jobs to a target location, such as a directory or a (zipped) archive file.
 
diff --git a/tests/test_project.py b/tests/test_project.py
index ba22e356e..102238453 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -927,6 +927,11 @@ def get_doc(i):
             for job in list(g):
                 assert job.document["a"] == k
 
+        for k, g in self.project.groupby("doc.b"):
+            assert len(list(g)) == 6
+            for job in list(g):
+                assert job.document["b"] == k
+
         assert len(list(self.project.groupby("doc.d"))) == 0
         for k, g in self.project.groupby("doc.d", default=-1):
             assert k == -1
@@ -950,6 +955,17 @@ def get_doc(i):
                 assert job.sp["b"] == k[0]
                 assert job.document["c"] == k[1]
 
+        for k, g in self.project.groupby(lambda job: job.doc["a"] % 4):
+            assert len(list(g)) == 3
+            for job in list(g):
+                assert job.document["a"] % 4 == k
+
+        for k, g in self.project.groupby(lambda job: str(job.doc)):
+            assert len(list(g)) == 1
+            for job in list(g):
+                assert str(job.document) == k
+
+        # Make the schema heterogeneous
         self.project.open_job({"a": 20}).init()
         for k, g in self.project.groupby("b"):
             assert len(list(g)) == 6
@@ -961,45 +977,10 @@ def get_doc(i):
                 assert job.sp["b"] == k[0]
                 assert job.sp["c"] == k[1]
 
-    def test_jobs_groupbydoc(self):
-        def get_doc(i):
-            return {"a": i, "b": i % 2, "c": i % 3}
-
-        for i in range(12):
-            job = self.project.open_job({"i": i}).init()
-            job.document = get_doc(i)
-
-        for k, g in self.project.groupbydoc("a"):
-            assert len(list(g)) == 1
-            for job in list(g):
-                assert job.document["a"] == k
-        for k, g in self.project.groupbydoc("b"):
-            assert len(list(g)) == 6
-            for job in list(g):
-                assert job.document["b"] == k
-        with pytest.raises(KeyError):
-            for k, g in self.project.groupbydoc("d"):
-                pass
-        for k, g in self.project.groupbydoc("d", default=-1):
-            assert k == -1
-            assert len(list(g)) == len(self.project)
-        for k, g in self.project.groupbydoc(("b", "c")):
-            assert len(list(g)) == 2
-            for job in list(g):
-                assert job.document["b"] == k[0]
-                assert job.document["c"] == k[1]
-        for k, g in self.project.groupbydoc(lambda doc: doc["a"] % 4):
-            assert len(list(g)) == 3
-            for job in list(g):
-                assert job.document["a"] % 4 == k
-        for k, g in self.project.groupbydoc(lambda doc: str(doc)):
-            assert len(list(g)) == 1
-            for job in list(g):
-                assert str(job.document) == k
         group_count = 0
-        for k, g in self.project.groupbydoc():
+        for k, g in self.project.groupby(lambda job: job.id):
             assert len(list(g)) == 1
-            group_count = group_count + 1
+            group_count += 1
             for job in list(g):
                 assert str(job) == k
         assert group_count == len(list(self.project.find_jobs()))