Skip to content

Commit

Permalink
Remove groupbydoc method and consolidate tests. (#601)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Oct 27, 2022
1 parent f23e6d3 commit b773921
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 174 deletions.
2 changes: 1 addition & 1 deletion changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Removed
- The pre-SyncedCollection synchronized dictionary classes, including SyncedDict, SyncedAttrDict, and JSONDict (#577).
- The old custom JSON encoder and dumps wrapper (#577).
- The MPIPool and the filesystems.py module (#575).
- The following Project methods: ``get_id``, ``build_job_search_index``, ``build_job_statepoint_index``, ``find_job_ids``, ``reset_statepoint``, ``update_statepoint``, ``create_access_module``, ``index``, ``dump_statepoints``, ``get_statepoint``, ``read_statepoints``, ``write_statepoints`` (#574, #593, #599).
- The following Project methods: ``get_id``, ``build_job_search_index``, ``build_job_statepoint_index``, ``find_job_ids``, ``reset_statepoint``, ``update_statepoint``, ``create_access_module``, ``index``, ``dump_statepoints``, ``get_statepoint``, ``read_statepoints``, ``write_statepoints``, ``groupbydoc`` (#574, #593, #599, #601).
- The following Job methods: ``get_id`` (#578).
- The ``syncutil.copytree`` method (#581).
- All Crawlers, including ``RegexFileCrawler``, ``MainCrawler``, ``MasterCrawler``, ``SignacProjectCrawler``, and ``BaseCrawler``, in addition to all associated functionality in indexing.py (#580).
Expand Down
2 changes: 0 additions & 2 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ The Project
Project.find_jobs
Project.fn
Project.groupby
Project.groupbydoc
Project.import_from
Project.id
Project.isfile
Expand Down Expand Up @@ -62,7 +61,6 @@ The JobsCursor class
.. autosummary::
JobsCursor.export_to
JobsCursor.groupby
JobsCursor.groupbydoc
JobsCursor.to_dataframe


Expand Down
138 changes: 4 additions & 134 deletions signac/contrib/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,62 +853,13 @@ def groupby(self, key=None, default=None):
Yields
------
key :
Grouped key.
Key identifying this group.
group : iterable of Jobs
Iterable of `Job` instances matching this group key.
Iterable of `Job` instances matching this group.
"""
yield from self.find_jobs().groupby(key, default=default)

@deprecated(
deprecated_in="1.7",
removed_in="2.0",
current_version=__version__,
details=(
"Use groupby with a 'doc.' filter instead, see "
"https://docs.signac.io/en/latest/query.html#query-namespaces."
),
)
def groupbydoc(self, key=None, default=None):
"""Group jobs according to one or more document values.
This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
project.
Examples
--------
.. code-block:: python
# Group jobs by document value 'a'.
for key, group in project.groupbydoc('a'):
print(key, list(group))
# Find jobs where job.sp['a'] is 1 and group them
# by job.document['b'] and job.document['c'].
for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
print(key, list(group))
# Group by whether 'd' is a field in the job.document using a lambda.
for key, group in project.groupbydoc(lambda doc: 'd' in doc):
print(key, list(group))
If `key` is None, jobs are grouped by id, placing one job into each group.
Parameters
----------
key : str, iterable, or callable
The document grouping parameter(s) passed as a string, iterable
of strings, or a callable that will be passed one argument,
:attr:`~signac.contrib.job.Job.document` (Default value = None).
default :
A default value to be used when a given document key is not
present. The value must be sortable and is only used if not None
(Default value = None).
"""
return self.find_jobs().groupbydoc(key, default=default)

def to_dataframe(self, *args, **kwargs):
r"""Export the project metadata to a pandas :class:`~pandas.DataFrame`.
Expand Down Expand Up @@ -2099,9 +2050,9 @@ def groupby(self, key=None, default=None):
Yields
------
key :
Grouped key.
Key identifying this group.
group : iterable of Jobs
Iterable of `Job` instances matching this group key.
Iterable of `Job` instances matching this group.
"""
_filter = self._filter
Expand Down Expand Up @@ -2200,87 +2151,6 @@ def keyfunction(job):
key=keyfunction,
)

@deprecated(
deprecated_in="1.7",
removed_in="2.0",
current_version=__version__,
details=(
"Use groupby with a 'doc.' filter instead, see "
"https://docs.signac.io/en/latest/query.html#query-namespaces."
),
)
def groupbydoc(self, key=None, default=None):
"""Group jobs according to one or more document values.
This method can be called on any :class:`~signac.contrib.project.JobsCursor` such as
the one returned by :meth:`~signac.Project.find_jobs` or by iterating over a
project.
Examples
--------
.. code-block:: python
# Group jobs by document value 'a'.
for key, group in project.groupbydoc('a'):
print(key, list(group))
# Find jobs where job.sp['a'] is 1 and group them
# by job.document['b'] and job.document['c'].
for key, group in project.find_jobs({'a': 1}).groupbydoc(('b', 'c')):
print(key, list(group))
# Group by whether 'd' is a field in the job.document using a lambda.
for key, group in project.groupbydoc(lambda doc: 'd' in doc):
print(key, list(group))
If `key` is None, jobs are grouped by id, placing one job into each group.
Parameters
----------
key : str, iterable, or callable
The document grouping parameter(s) passed as a string, iterable
of strings, or a callable that will be passed one argument,
:attr:`~signac.contrib.job.Job.document` (Default value = None).
default :
A default value to be used when a given document key is not
present. The value must be sortable and is only used if not None
(Default value = None).
"""
if isinstance(key, str):
if default is None:

def keyfunction(job):
return job.document[key]

else:

def keyfunction(job):
return job.document.get(key, default)

elif isinstance(key, Iterable):
if default is None:

def keyfunction(job):
return tuple(job.document[k] for k in key)

else:

def keyfunction(job):
return tuple(job.document.get(k, default) for k in key)

elif key is None:
# Must return a type that can be ordered with <, >
def keyfunction(job):
return str(job)

else:
# Pass the job document to a callable
def keyfunction(job):
return key(job.document)

return groupby(sorted(iter(self), key=keyfunction), key=keyfunction)

def export_to(self, target, path=None, copytree=None):
"""Export all jobs to a target location, such as a directory or a (zipped) archive file.
Expand Down
55 changes: 18 additions & 37 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,11 @@ def get_doc(i):
for job in list(g):
assert job.document["a"] == k

for k, g in self.project.groupby("doc.b"):
assert len(list(g)) == 6
for job in list(g):
assert job.document["b"] == k

assert len(list(self.project.groupby("doc.d"))) == 0
for k, g in self.project.groupby("doc.d", default=-1):
assert k == -1
Expand All @@ -961,6 +966,17 @@ def get_doc(i):
assert job.sp["b"] == k[0]
assert job.document["c"] == k[1]

for k, g in self.project.groupby(lambda job: job.doc["a"] % 4):
assert len(list(g)) == 3
for job in list(g):
assert job.document["a"] % 4 == k

for k, g in self.project.groupby(lambda job: str(job.doc)):
assert len(list(g)) == 1
for job in list(g):
assert str(job.document) == k

# Make the schema heterogeneous
self.project.open_job({"a": 20}).init()
for k, g in self.project.groupby("b"):
assert len(list(g)) == 6
Expand All @@ -972,45 +988,10 @@ def get_doc(i):
assert job.sp["b"] == k[0]
assert job.sp["c"] == k[1]

def test_jobs_groupbydoc(self):
def get_doc(i):
return {"a": i, "b": i % 2, "c": i % 3}

for i in range(12):
job = self.project.open_job({"i": i}).init()
job.document = get_doc(i)

for k, g in self.project.groupbydoc("a"):
assert len(list(g)) == 1
for job in list(g):
assert job.document["a"] == k
for k, g in self.project.groupbydoc("b"):
assert len(list(g)) == 6
for job in list(g):
assert job.document["b"] == k
with pytest.raises(KeyError):
for k, g in self.project.groupbydoc("d"):
pass
for k, g in self.project.groupbydoc("d", default=-1):
assert k == -1
assert len(list(g)) == len(self.project)
for k, g in self.project.groupbydoc(("b", "c")):
assert len(list(g)) == 2
for job in list(g):
assert job.document["b"] == k[0]
assert job.document["c"] == k[1]
for k, g in self.project.groupbydoc(lambda doc: doc["a"] % 4):
assert len(list(g)) == 3
for job in list(g):
assert job.document["a"] % 4 == k
for k, g in self.project.groupbydoc(lambda doc: str(doc)):
assert len(list(g)) == 1
for job in list(g):
assert str(job.document) == k
group_count = 0
for k, g in self.project.groupbydoc():
for k, g in self.project.groupby(lambda job: job.id):
assert len(list(g)) == 1
group_count = group_count + 1
group_count += 1
for job in list(g):
assert str(job) == k
assert group_count == len(list(self.project.find_jobs()))
Expand Down

0 comments on commit b773921

Please sign in to comment.