From 6c2d2ab92778dab32ed94fa2554e32d535041244 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 11:58:56 -0500 Subject: [PATCH 01/18] Remove index argument from _find_job_ids and CLI. --- signac/__main__.py | 34 +----------------------------- signac/contrib/project.py | 44 ++++++++++++++++----------------------- 2 files changed, 19 insertions(+), 59 deletions(-) diff --git a/signac/__main__.py b/signac/__main__.py index 73b600cb7..a55035b21 100644 --- a/signac/__main__.py +++ b/signac/__main__.py @@ -113,13 +113,6 @@ def _fmt_bytes(nbytes, suffix="B"): return "{:.1f} {}{}".format(nbytes, "Yi", suffix) -def _read_index(project, fn_index=None): - if fn_index is not None: - _print_err(f"Reading index from file '{fn_index}'...") - with open(fn_index) as file_descriptor: - return [json.loads(line) for line in file_descriptor] - - def _open_job_by_id(project, job_id): """Attempt to open a job by id and provide user feedback on error.""" try: @@ -160,23 +153,14 @@ def find_with_filter(args): return args.job_id project = get_project() - if hasattr(args, "index"): - index = _read_index(project, args.index) - else: - index = None - f = parse_filter_arg(args.filter) df = parse_filter_arg(args.doc_filter) - return get_project()._find_job_ids(index=index, filter=f, doc_filter=df) + return project._find_job_ids(filter=f, doc_filter=df) def main_project(args): """Handle project subcommand.""" project = get_project() - if args.index: - for doc in project._index(): - print(json.dumps(doc)) - return if args.workspace: print(project.workspace()) else: @@ -350,7 +334,6 @@ def main_view(args): prefix=args.prefix, path=args.path, job_ids=find_with_filter(args), - index=_read_index(args.index), ) @@ -973,12 +956,6 @@ def main(): action="store_true", help="Print the project's workspace path instead of the project id.", ) - parser_project.add_argument( - "-i", - "--index", - action="store_true", - help="Generate and print an index for the project.", - ) parser_project.set_defaults(func=main_project) parser_job = subparsers.add_parser("job") @@ -1130,9 +1107,6 @@ def main(): nargs="+", help="Show documents of job matching this document filter.", ) - parser_document.add_argument( - "--index", type=str, help="The filename of an index file." - ) parser_document.set_defaults(func=main_document) parser_remove = subparsers.add_parser("rm") @@ -1202,9 +1176,6 @@ def main(): parser_find.add_argument( "-d", "--doc-filter", type=str, nargs="+", help="A document filter." ) - parser_find.add_argument( - "-i", "--index", type=str, help="The filename of an index file." - ) parser_find.add_argument( "-s", "--show", @@ -1298,9 +1269,6 @@ def main(): nargs="+", help="Limit the view to jobs with these job ids.", ) - selection_group.add_argument( - "-i", "--index", type=str, help="The filename of an index file." - ) parser_view.set_defaults(func=main_view) parser_schema = subparsers.add_parser("schema") diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 4b27e452c..19b65a9f8 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -814,34 +814,27 @@ def detect_schema(self, exclude_const=False, subset=None, index=None): ) return ProjectSchema.detect(statepoint_index) - def _find_job_ids(self, filter=None, doc_filter=None, index=None): + def _find_job_ids(self, filter=None, doc_filter=None): """Find the job_ids of all jobs matching the filters. The optional filter arguments must be a JSON serializable mapping of key-value pairs. - .. note:: - Providing a pre-calculated index may vastly increase the - performance of this function. - Parameters ---------- filter : Mapping - A mapping of key-value pairs that all indexed job state points - are compared against (Default value = None). - doc_filter : Mapping - A mapping of key-value pairs that all indexed job documents are + A mapping of key-value pairs that all job state points are compared against (Default value = None). - index : - A document index. If not provided, an index will be computed - (Default value = None). + doc_filter : Mapping + A mapping of key-value pairs that all job documents are compared + against (Default value = None). Returns ------- Collection or list - The ids of all indexed jobs matching both filters. If no arguments - are provided to this method, the ids are returned as a list. If - any of the arguments are provided, a :class:`Collection` containing + The ids of all jobs matching both filters. If no arguments are + provided to this method, the ids are returned as a list. If any + of the arguments are provided, a :class:`Collection` containing all the ids is returned. Raises @@ -864,18 +857,17 @@ def _find_job_ids(self, filter=None, doc_filter=None, index=None): dict filters) is recommended. """ - if not filter and not doc_filter and index is None: + if not filter and not doc_filter: return list(self._job_dirs()) - if index is None: - filter = dict(parse_filter(_add_prefix("sp.", filter))) - if doc_filter: - warnings.warn(DOC_FILTER_WARNING, DeprecationWarning) - filter.update(parse_filter(_add_prefix("doc.", doc_filter))) - index = self._index(include_job_document=True) - elif "doc" in _root_keys(filter): - index = self._index(include_job_document=True) - else: - index = self._sp_index() + filter = dict(parse_filter(_add_prefix("sp.", filter))) + if doc_filter: + warnings.warn(DOC_FILTER_WARNING, DeprecationWarning) + filter.update(parse_filter(_add_prefix("doc.", doc_filter))) + index = self._index(include_job_document=True) + elif "doc" in _root_keys(filter): + index = self._index(include_job_document=True) + else: + index = self._sp_index() return Collection(index, _trust=True)._find(filter) def find_jobs(self, filter=None, doc_filter=None): From f69356488f76c0c463d35f5bf8f705972b97c98e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 13:55:42 -0500 Subject: [PATCH 02/18] Remove index argument from linked views. --- signac/contrib/linked_view.py | 27 +++++---------------------- signac/contrib/project.py | 15 ++------------- tests/test_project.py | 6 ------ 3 files changed, 7 insertions(+), 41 deletions(-) diff --git a/signac/contrib/linked_view.py b/signac/contrib/linked_view.py index 1cb569881..366f05f53 100644 --- a/signac/contrib/linked_view.py +++ b/signac/contrib/linked_view.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -def create_linked_view(project, prefix=None, job_ids=None, index=None, path=None): +def create_linked_view(project, prefix=None, job_ids=None, path=None): """Create or update a persistent linked view of the selected data space. Parameters @@ -26,8 +26,6 @@ def create_linked_view(project, prefix=None, job_ids=None, index=None, path=None job_ids : iterable If None (the default), create the view for the complete data space, otherwise only for this iterable of job ids. - index : - A document index (Default value = None). path : The path (function) used to structure the linked data space (Default value = None). @@ -42,8 +40,6 @@ def create_linked_view(project, prefix=None, job_ids=None, index=None, path=None OSError Linked views cannot be created on Windows because symbolic links are not supported by the platform. - ValueError - When the selected data space is provided with an insufficient index. RuntimeError When state points contain one of ``[os.sep, " ", "*"]``. @@ -60,23 +56,10 @@ def create_linked_view(project, prefix=None, job_ids=None, index=None, path=None if prefix is None: prefix = "view" - if index is None: - if job_ids is None: - index = [{"_id": job.id, "sp": job.sp()} for job in project] - jobs = list(project) - else: - index = [ - {"_id": job_id, "sp": project.open_job(id=job_id).sp()} - for job_id in job_ids - ] - jobs = list(project.open_job(id=job_id) for job_id in job_ids) - elif job_ids is not None: - if not isinstance(job_ids, set): - job_ids = set(job_ids) - index = [doc for doc in index if doc["_id"] in job_ids] + if job_ids is None: + jobs = list(project) + else: jobs = list(project.open_job(id=job_id) for job_id in job_ids) - if not job_ids.issubset({doc["_id"] for doc in index}): - raise ValueError("Insufficient index for selected data space.") key_list = [k for job in jobs for k in job.statepoint().keys()] value_list = [v for job in jobs for v in job.statepoint().values()] @@ -92,7 +75,7 @@ def create_linked_view(project, prefix=None, job_ids=None, index=None, path=None if any(bad_items): err_msg = " ".join( [ - f"In order to use view, statepoints should not contain {bad_chars}:", + f"In order to use view, state points should not contain {bad_chars}:", *bad_items, ] ) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 19b65a9f8..562c3e686 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -1290,7 +1290,7 @@ def get_statepoint(self, jobid, fn=None): """ return self._get_statepoint(job_id=jobid, fn=fn) - def create_linked_view(self, prefix=None, job_ids=None, index=None, path=None): + def create_linked_view(self, prefix=None, job_ids=None, path=None): """Create or update a persistent linked view of the selected data space. Similar to :meth:`~signac.Project.export_to`, this function expands the data space @@ -1335,8 +1335,6 @@ def create_linked_view(self, prefix=None, job_ids=None, index=None, path=None): job_ids : iterable If None (the default), create the view for the complete data space, otherwise only for this iterable of job ids. - index : - A document index (Default value = None). path : The path (function) used to structure the linked data space (Default value = None). @@ -1347,18 +1345,9 @@ def create_linked_view(self, prefix=None, job_ids=None, index=None, path=None): directory paths. """ - if index is not None: - warnings.warn( - ( - "The `index` argument is deprecated as of version 1.3 and will be " - "removed in version 2.0." - ), - DeprecationWarning, - ) - from .linked_view import create_linked_view - return create_linked_view(self, prefix, job_ids, index, path) + return create_linked_view(self, prefix, job_ids, path) def clone(self, job, copytree=shutil.copytree): """Clone job into this project. diff --git a/tests/test_project.py b/tests/test_project.py index 7c4fe36fd..5ebdbc34b 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1692,12 +1692,6 @@ def clean(filter=None): job_subset = self.project.find_jobs({"b": 0}) id_subset = [job.id for job in job_subset] - bad_index = [dict(_id=i) for i in range(3)] - with pytest.raises(ValueError): - self.project.create_linked_view( - prefix=view_prefix, job_ids=id_subset, index=bad_index - ) - self.project.create_linked_view(prefix=view_prefix, job_ids=id_subset) all_links = list(_find_all_links(view_prefix)) assert len(all_links) == len(id_subset) From 2e8d8a8b53dd1cae37a652a165cbbb85b2aade70 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 13:56:27 -0500 Subject: [PATCH 03/18] Remove access modules. --- signac/contrib/project.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 562c3e686..97191b083 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -46,24 +46,6 @@ JOB_ID_REGEX = re.compile("[a-f0-9]{32}") -ACCESS_MODULE_MINIMAL = """import signac - -def get_indexes(root): - yield signac.get_project(root).index() -""" - -ACCESS_MODULE_MAIN = """#!/usr/bin/env python -# -*- coding: utf-8 -*- -import signac - -def get_indexes(root): - yield signac.get_project(root).index() - -if __name__ == '__main__': - with signac.Collection.open('index.txt') as index: - signac.export(signac.index(), index, update=True) -""" - # The warning used for doc filter deprecation everywhere. Don't use # triple-quoted multi-line string to avoid inserting newlines. # TODO: In signac 2.0, remove all docstrings for doc_filter parameters. The From e9cc078932fd4e878e3c267a94d93a40022bbf1a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 14:00:00 -0500 Subject: [PATCH 04/18] Remove index argument from detect_schema. --- signac/contrib/project.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 97191b083..fcf982b86 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -762,7 +762,7 @@ def __contains__(self, job): """ return self._contains_job_id(job.id) - def detect_schema(self, exclude_const=False, subset=None, index=None): + def detect_schema(self, exclude_const=False, subset=None): """Detect the project's state point schema. See :ref:`signac schema ` for the command line equivalent. @@ -775,8 +775,6 @@ def detect_schema(self, exclude_const=False, subset=None, index=None): subset : A sequence of jobs or job ids specifying a subset over which the state point schema should be detected (Default value = None). - index : - A document index (Default value = None). Returns ------- @@ -786,8 +784,7 @@ def detect_schema(self, exclude_const=False, subset=None, index=None): """ from .schema import _build_job_statepoint_index - if index is None: - index = self._index(include_job_document=False) + index = self._index(include_job_document=False) if subset is not None: subset = {str(s) for s in subset} index = [doc for doc in index if doc["_id"] in subset] From 0b3237956f098807aa2058a8cdeba3c2825f7391 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 14:00:59 -0500 Subject: [PATCH 05/18] Improve docstrings. --- signac/contrib/project.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index fcf982b86..49093a277 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -853,8 +853,8 @@ def find_jobs(self, filter=None, doc_filter=None): """Find all jobs in the project's workspace. The optional filter arguments must be a Mapping of key-value pairs and - JSON serializable. The `filter` argument is used to search against job - state points, whereas the `doc_filter` argument compares against job + JSON serializable. The ``filter`` argument is used to search against job + state points, whereas the ``doc_filter`` argument compares against job document keys. See :ref:`signac find ` for the command line equivalent. @@ -862,10 +862,10 @@ def find_jobs(self, filter=None, doc_filter=None): Parameters ---------- filter : Mapping - A mapping of key-value pairs that all indexed job state points are + A mapping of key-value pairs that job state points are compared against (Default value = None). doc_filter : Mapping - A mapping of key-value pairs that all indexed job documents are + A mapping of key-value pairs that job documents are compared against (Default value = None). Returns @@ -1720,7 +1720,7 @@ def _build_index(self, include_job_document=False): Parameters ---------- - include_job_document : + include_job_document : bool Whether to include the job document in the index (Default value = False). From 35d2638b874d3416cbdef095cf1e16a31fc2ae24 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Aug 2021 14:29:21 -0500 Subject: [PATCH 06/18] Remove unused _read_index function. --- signac/contrib/filterparse.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/signac/contrib/filterparse.py b/signac/contrib/filterparse.py index 0fe52b75c..2db9f9c61 100644 --- a/signac/contrib/filterparse.py +++ b/signac/contrib/filterparse.py @@ -40,28 +40,6 @@ def _with_message(query, file): return query -def _read_index(project, fn_index=None): - """Read index from the file passed. - - Parameters - ---------- - project : :class:`~signac.Project` - Project handle. - fn_index : str - File name of the index (Default value = None). - - Returns - ------- - generator - Returns the file contents, parsed as JSON-encoded lines. - - """ - if fn_index is not None: - _print_err(f"Reading index from file '{fn_index}'...") - fd = open(fn_index) - return (json.loads(line) for line in fd) - - def _is_json(q): """Check if q is JSON. From 5641b0f4c2b3014b6076931cccdb489ec5134967 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sat, 7 Aug 2021 14:44:14 -0500 Subject: [PATCH 07/18] Use generator in the same line it is constructed; remove unclear variable name jsi. --- signac/contrib/import_export.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/signac/contrib/import_export.py b/signac/contrib/import_export.py index ea1cfdbfe..fe7bb3246 100644 --- a/signac/contrib/import_export.py +++ b/signac/contrib/import_export.py @@ -63,8 +63,7 @@ def _make_schema_based_path_function(jobs, exclude_keys=None, delimiter_nested=" return lambda job, sep=None: "" index = [{"_id": job.id, "sp": job.sp()} for job in jobs] - jsi = _build_job_statepoint_index(exclude_const=True, index=index) - sp_index = OrderedDict(jsi) + sp_index = OrderedDict(_build_job_statepoint_index(exclude_const=True, index=index)) paths = {} for key_tokens, values in sp_index.items(): From ea0ccd208187fa604a0938d80b1eaa3fca81d8b6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 10:14:26 -0500 Subject: [PATCH 08/18] Rename sp_index to statepoint_index. --- signac/contrib/import_export.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/signac/contrib/import_export.py b/signac/contrib/import_export.py index fe7bb3246..2b32af10a 100644 --- a/signac/contrib/import_export.py +++ b/signac/contrib/import_export.py @@ -63,10 +63,12 @@ def _make_schema_based_path_function(jobs, exclude_keys=None, delimiter_nested=" return lambda job, sep=None: "" index = [{"_id": job.id, "sp": job.sp()} for job in jobs] - sp_index = OrderedDict(_build_job_statepoint_index(exclude_const=True, index=index)) + statepoint_index = OrderedDict( + _build_job_statepoint_index(exclude_const=True, index=index) + ) paths = {} - for key_tokens, values in sp_index.items(): + for key_tokens, values in statepoint_index.items(): key = key_tokens.replace(".", delimiter_nested) if exclude_keys and key in exclude_keys: continue From e97c9c0b16e6608539230066ac8d3def68d09901 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 10:14:39 -0500 Subject: [PATCH 09/18] Remove index argument from repair method. --- signac/contrib/project.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 49093a277..2993ec5d3 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -1605,7 +1605,7 @@ def check(self): # State point backup files are being removed in favor of Project.update_cache(). # Change this method in signac 2.0 to use the state point cache by default # instead of FN_STATEPOINTS. - def repair(self, fn_statepoints=None, index=None, job_ids=None): + def repair(self, fn_statepoints=None, job_ids=None): """Attempt to repair the workspace after it got corrupted. This method will attempt to repair lost or corrupted job state point @@ -1616,8 +1616,6 @@ def repair(self, fn_statepoints=None, index=None, job_ids=None): fn_statepoints : str The filename of the file containing the state points, defaults to :attr:`~signac.Project.FN_STATEPOINTS`. - index : - A document index (Default value = None). job_ids : An iterable of job ids that should get repaired. Defaults to all jobs. @@ -1642,9 +1640,6 @@ def repair(self, fn_statepoints=None, index=None, job_ids=None): except OSError as error: if error.errno != errno.ENOENT or fn_statepoints is not None: raise - if index is not None: - for doc in index: - self._sp_cache[doc["signac_id"]] = doc["sp"] corrupted = [] for job_id in job_ids: From 1c10d2a446a877126edd71dc5a1e5a41159de38d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 10:16:13 -0500 Subject: [PATCH 10/18] Clarify docstrings. --- signac/contrib/project.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 2993ec5d3..6bb3ec84a 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -823,17 +823,16 @@ def _find_job_ids(self, filter=None, doc_filter=None): ValueError If the filters are invalid. RuntimeError - If the filters are not supported by the index. + If the filters are not supported. Notes ----- - If all arguments are ``None``, this method skips indexing the data - space and instead simply iterates over all job directories. This - code path can be much faster for certain use cases since it defers - all work that would be required to construct an index, so in - performance-critical applications where no filtering of the data space - is required, passing no arguments to this method (as opposed to empty - dict filters) is recommended. + If all arguments are ``None``, this method simply returns a list of all + job directories. This code path can be much faster for certain use cases + since it defers all work that would be required to construct an index, + so in performance-critical applications where no filtering of the data + space is required, passing no arguments to this method (as opposed to + empty dict filters) is recommended. """ if not filter and not doc_filter: From d3ee9e46144f05cce7ce9cf3abe1b8d58ebadfd1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 20:21:04 -0500 Subject: [PATCH 11/18] Remove index from docstring. --- signac/contrib/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 6bb3ec84a..21550b494 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -1608,7 +1608,7 @@ def repair(self, fn_statepoints=None, job_ids=None): """Attempt to repair the workspace after it got corrupted. This method will attempt to repair lost or corrupted job state point - manifest files using a state points file or a document index or both. + manifest files using a state points file. Parameters ---------- From 2ece213e6cf91effe2ca6b390717ce3110ef11ee Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 20:33:00 -0500 Subject: [PATCH 12/18] Remove deprecated state point backup functions. --- signac/contrib/project.py | 157 ++------------------------------------ 1 file changed, 5 insertions(+), 152 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 21550b494..9f78a140d 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -213,11 +213,6 @@ class Project: KEY_DATA = "signac_data" "The project's datastore key." - # Remove in signac 2.0. - # State point backup files are being removed in favor of Project.update_cache(). - FN_STATEPOINTS = "signac_statepoints.json" - "The default filename to read from and write state points to." - FN_CACHE = ".signac_sp_cache.json.gz" "The default filename for the state point cache file." @@ -1017,114 +1012,6 @@ def to_dataframe(self, *args, **kwargs): """ return self.find_jobs().to_dataframe(*args, **kwargs) - @deprecated( - deprecated_in="1.8", - removed_in="2.0", - current_version=__version__, - details="State point backup files are being removed in favor of Project.update_cache().", - ) - def read_statepoints(self, fn=None): - """Read all state points from a file. - - See Also - -------- - dump_statepoints : Dump the state points and associated job ids. - write_statepoints : Dump state points to a file. - - Parameters - ---------- - fn : str - The filename of the file containing the state points, - defaults to :attr:`~signac.Project.FN_STATEPOINTS`. - - Returns - ------- - dict - State points. - - """ - if fn is None: - fn = self.fn(self.FN_STATEPOINTS) - # See comment in write state points. - with open(fn) as file: - return json.loads(file.read()) - - @deprecated( - deprecated_in="1.8", - removed_in="2.0", - current_version=__version__, - details="State point backup files are being removed in favor of Project.update_cache().", - ) - def dump_statepoints(self, statepoints): - """Dump the state points and associated job ids. - - Equivalent to: - - .. code-block:: python - - {project.open_job(sp).id: sp for sp in statepoints} - - Parameters - ---------- - statepoints : iterable - A list of state points. - - Returns - ------- - dict - A mapping, where the key is the job id and the value is the - state point. - - """ - return {calc_id(sp): sp for sp in statepoints} - - @deprecated( - deprecated_in="1.8", - removed_in="2.0", - current_version=__version__, - details="State point backup files are being removed in favor of Project.update_cache().", - ) - def write_statepoints(self, statepoints=None, fn=None, indent=2): - """Dump state points to a file. - - If the file already contains state points, all new state points - will be appended, while the old ones are preserved. - - See Also - -------- - dump_statepoints : Dump the state points and associated job ids. - - Parameters - ---------- - statepoints : iterable - A list of state points, defaults to all state points which are - defined in the workspace. - fn : str - The filename of the file containing the state points, defaults to - :attr:`~signac.Project.FN_STATEPOINTS`. - indent : int - Specify the indentation of the JSON file (Default value = 2). - - """ - if fn is None: - fn = self.fn(self.FN_STATEPOINTS) - try: - tmp = self.read_statepoints(fn=fn) - except OSError as error: - if error.errno != errno.ENOENT: - raise - tmp = {} - if statepoints is None: - job_ids = self._job_dirs() - _cache = {_id: self._get_statepoint(_id) for _id in job_ids} - else: - _cache = {calc_id(sp): sp for sp in statepoints} - - tmp.update(_cache) - logger.debug(f"Writing state points file with {len(tmp)} entries.") - with open(fn, "w") as file: - file.write(json.dumps(tmp, indent=indent)) - def _register(self, _id, statepoint): """Register the job state point in the project state point cache. @@ -1209,27 +1096,9 @@ def _get_statepoint(self, job_id, fn=None): "to update cache with the Project.update_cache() method." ) self._sp_cache_warned = True - try: - statepoint = self._get_statepoint_from_workspace(job_id) - # Update the project's state point cache from this cache miss - self._sp_cache[job_id] = statepoint - except KeyError as error: - # Fall back to a file containing all state points because the state - # point could not be read from the job workspace. - # - # In signac 2.0, Project.read_statepoints will be removed. - # Update this code path to "raise error" and update the method - # documentation accordingly. - try: - statepoints = self.read_statepoints(fn=fn) - # Update the project's state point cache - self._sp_cache.update(statepoints) - statepoint = statepoints[job_id] - except OSError as io_error: - if io_error.errno != errno.ENOENT: - raise io_error - else: - raise error + statepoint = self._get_statepoint_from_workspace(job_id) + # Update the project's state point cache from this cache miss + self._sp_cache[job_id] = statepoint return statepoint @deprecated( @@ -1601,20 +1470,14 @@ def check(self): ) raise JobsCorruptedError(corrupted) - # State point backup files are being removed in favor of Project.update_cache(). - # Change this method in signac 2.0 to use the state point cache by default - # instead of FN_STATEPOINTS. - def repair(self, fn_statepoints=None, job_ids=None): + def repair(self, job_ids=None): """Attempt to repair the workspace after it got corrupted. This method will attempt to repair lost or corrupted job state point - manifest files using a state points file. + manifest files using a state point cache. Parameters ---------- - fn_statepoints : str - The filename of the file containing the state points, defaults - to :attr:`~signac.Project.FN_STATEPOINTS`. job_ids : An iterable of job ids that should get repaired. Defaults to all jobs. @@ -1629,16 +1492,6 @@ def repair(self, fn_statepoints=None, job_ids=None): # Load internal cache from all available external sources. self._read_cache() - try: - # Updates the state point cache from the provided file - # - # In signac 2.0, Project.read_statepoints will be removed. - # Remove this code path (only use "self._read_cache()" above) and - # update the method signature and docs to remove "fn_statepoints." - self._sp_cache.update(self.read_statepoints(fn=fn_statepoints)) - except OSError as error: - if error.errno != errno.ENOENT or fn_statepoints is not None: - raise corrupted = [] for job_id in job_ids: From cd9f4b2f686425bcd900be6d548bdb3e11cfd91c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 20:50:19 -0500 Subject: [PATCH 13/18] Remove docs for removed methods. --- doc/api.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index a8e0fbb83..a71eb1c6e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -25,7 +25,6 @@ The Project Project.data Project.doc Project.document - Project.dump_statepoints Project.export_to Project.find_jobs Project.fn @@ -38,7 +37,6 @@ The Project Project.min_len_unique_id Project.num_jobs Project.open_job - Project.read_statepoints Project.repair Project.root_directory Project.stores @@ -46,7 +44,6 @@ The Project Project.update_cache Project.update_statepoint Project.workspace - Project.write_statepoints .. autoclass:: Project :members: From 7ca12b2b20f9d486c65b32633758c3fa29f2b897 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 20:51:11 -0500 Subject: [PATCH 14/18] Fix tests for repair function to use only the persistent cache. --- tests/test_project.py | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 5ebdbc34b..950f36894 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -206,20 +206,6 @@ def test_data(self): self.project.data = {"a": {"b": 45}} assert self.project.data == {"a": {"b": 45}} - def test_write_read_statepoint(self): - statepoints = [{"a": i} for i in range(5)] - self.project.dump_statepoints(statepoints) - self.project.write_statepoints(statepoints) - read = list(self.project.read_statepoints().values()) - assert len(read) == len(statepoints) - more_statepoints = [{"b": i} for i in range(5, 10)] - self.project.write_statepoints(more_statepoints) - read2 = list(self.project.read_statepoints()) - assert len(read2) == len(statepoints) + len(more_statepoints) - for id_ in self.project.read_statepoints().keys(): - with pytest.deprecated_call(): - self.project.get_statepoint(id_) - def test_workspace_path_normalization(self): def norm_path(p): return os.path.abspath(os.path.expandvars(p)) @@ -590,6 +576,8 @@ def test_repair_corrupted_workspace(self): pass assert i == 4 + self.project.update_cache() + # no manifest file with self.project.open_job(statepoints[0]) as job: os.remove(job.FN_MANIFEST) @@ -598,12 +586,8 @@ def test_repair_corrupted_workspace(self): with open(job.FN_MANIFEST, "w"): pass - # Need to clear internal and persistent cache to encounter error. + # Need to clear internal cache to encounter error. self.project._sp_cache.clear() - self.project._remove_persistent_cache_file() - - # Ensure that state point hash table does not exist. - assert not os.path.isfile(self.project.fn(self.project.FN_STATEPOINTS)) # disable logging temporarily try: @@ -614,18 +598,16 @@ def test_repair_corrupted_workspace(self): for job in self.project: # Accessing the job state point triggers validation of the # state point manifest file - job.statepoint + job.statepoint() - with pytest.raises(JobsCorruptedError): - self.project.repair() - - self.project.write_statepoints(statepoints) self.project.repair() - os.remove(self.project.fn(self.project.FN_STATEPOINTS)) self.project._sp_cache.clear() + self.project._remove_persistent_cache_file() for job in self.project: - pass + # Accessing the job state point triggers validation of the + # state point manifest file + job.statepoint() finally: logging.disable(logging.NOTSET) From f51ff143598ab52f7ab3cdb8e7977f1b340a60ee Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 20:51:30 -0500 Subject: [PATCH 15/18] Remove public get_statepoint method. --- doc/api.rst | 1 - setup.cfg | 2 +- signac/contrib/project.py | 41 +-------------------------------------- 3 files changed, 2 insertions(+), 42 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index a71eb1c6e..a15543dc6 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -28,7 +28,6 @@ The Project Project.export_to Project.find_jobs Project.fn - Project.get_statepoint Project.groupby Project.groupbydoc Project.import_from diff --git a/setup.cfg b/setup.cfg index 4d9b59907..295feabc9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ omit = [tool:pytest] filterwarnings = - ignore: .*[get_statepoint | Use of.+as key] is deprecated.*: DeprecationWarning + ignore: .*[Use of.+as key] is deprecated.*: DeprecationWarning [bumpversion:file:setup.py] diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 9f78a140d..6754af795 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -1049,7 +1049,7 @@ def _get_statepoint_from_workspace(self, job_id): raise JobsCorruptedError([job_id]) raise KeyError(job_id) - def _get_statepoint(self, job_id, fn=None): + def _get_statepoint(self, job_id): """Get the state point associated with a job id. The state point is retrieved from the internal cache, from @@ -1059,9 +1059,6 @@ def _get_statepoint(self, job_id, fn=None): ---------- job_id : str A job id to get the state point for. - fn : str - The filename of the file containing the state points, defaults - to :attr:`~signac.Project.FN_STATEPOINTS`. Returns ------- @@ -1101,42 +1098,6 @@ def _get_statepoint(self, job_id, fn=None): self._sp_cache[job_id] = statepoint return statepoint - @deprecated( - deprecated_in="1.3", - removed_in="2.0", - current_version=__version__, - details="Use open_job(id=jobid).statepoint() function instead.", - ) - def get_statepoint(self, jobid, fn=None): - """Get the state point associated with a job id. - - The state point is retrieved from the internal cache, from - the workspace or from a state points file. - - Parameters - ---------- - jobid : str - A job id to get the state point for. - fn : str - The filename of the file containing the state points, defaults - to :attr:`~signac.Project.FN_STATEPOINTS`. - - Returns - ------- - dict - The state point corresponding to jobid. - - Raises - ------ - KeyError - If the state point associated with jobid could not be found. - :class:`signac.errors.JobsCorruptedError` - If the state point manifest file corresponding to jobid is - inaccessible or corrupted. - - """ - return self._get_statepoint(job_id=jobid, fn=fn) - def create_linked_view(self, prefix=None, job_ids=None, path=None): """Create or update a persistent linked view of the selected data space. From 62034d8564e17349a5e646e067282fb0349e426e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 9 Aug 2021 21:24:58 -0500 Subject: [PATCH 16/18] Remove error. --- signac/contrib/project.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 6754af795..80239e2df 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -817,8 +817,6 @@ def _find_job_ids(self, filter=None, doc_filter=None): If the filters are not JSON serializable. ValueError If the filters are invalid. - RuntimeError - If the filters are not supported. Notes ----- From d194c3bcf82585583bf1a1c644781c9934a187cb Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 10 Aug 2021 08:39:20 -0500 Subject: [PATCH 17/18] Improve rigor of state point validation after repair. --- tests/test_project.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 950f36894..ba22e356e 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -596,18 +596,20 @@ def test_repair_corrupted_workspace(self): # Iterating through the jobs should now result in an error. with pytest.raises(JobsCorruptedError): for job in self.project: - # Accessing the job state point triggers validation of the - # state point manifest file - job.statepoint() + # Validate the state point. + sp = job.statepoint() + assert len(sp) == 1 + assert sp["a"] in range(5) self.project.repair() self.project._sp_cache.clear() self.project._remove_persistent_cache_file() for job in self.project: - # Accessing the job state point triggers validation of the - # state point manifest file - job.statepoint() + # Validate the state point. + sp = job.statepoint() + assert len(sp) == 1 + assert sp["a"] in range(5) finally: logging.disable(logging.NOTSET) From 0a490d0a13b7b6a0d3afdc51b7007039a454481d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 10 Aug 2021 08:42:35 -0500 Subject: [PATCH 18/18] Clarify docstring notes. --- signac/contrib/project.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/signac/contrib/project.py b/signac/contrib/project.py index 80239e2df..5ec36a4eb 100644 --- a/signac/contrib/project.py +++ b/signac/contrib/project.py @@ -820,12 +820,12 @@ def _find_job_ids(self, filter=None, doc_filter=None): Notes ----- - If all arguments are ``None``, this method simply returns a list of all - job directories. This code path can be much faster for certain use cases - since it defers all work that would be required to construct an index, - so in performance-critical applications where no filtering of the data - space is required, passing no arguments to this method (as opposed to - empty dict filters) is recommended. + If all filter arguments are empty or ``None``, this method simply + returns a list of all job directories. This code path can be much faster + for certain use cases since it defers all work that would be required to + construct an index. In performance-critical applications where no + filtering of the data space is required, passing empty filters (or + ``None``) to this method is recommended. """ if not filter and not doc_filter: