From 353174665ee816b01c79f75ed950f96c1f8b14df Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 6 Aug 2021 11:23:00 -0500
Subject: [PATCH] Remove Project.index. (#593)

* Make Project._index method private, refactor to remove unused arguments.

* Remove xfailed crawler test (functionality no longer needed).

* Delete indexing module.

* Clean up references to index in docs.
---
 doc/api.rst                |   1 -
 signac/__main__.py         |   2 +-
 signac/contrib/indexing.py | 451 -------------------------------------
 signac/contrib/project.py  |  72 ++----
 signac/contrib/utility.py  |  41 ----
 tests/test_indexing.py     | 135 -----------
 tests/test_project.py      |  65 +-----
 7 files changed, 25 insertions(+), 742 deletions(-)
 delete mode 100644 signac/contrib/indexing.py
 delete mode 100644 tests/test_indexing.py

diff --git a/doc/api.rst b/doc/api.rst
index 99a0f1292..a8e0fbb83 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -34,7 +34,6 @@ The Project
     Project.groupbydoc
     Project.import_from
     Project.id
-    Project.index
     Project.isfile
     Project.min_len_unique_id
     Project.num_jobs
diff --git a/signac/__main__.py b/signac/__main__.py
index 2c451e02e..6e746a840 100644
--- a/signac/__main__.py
+++ b/signac/__main__.py
@@ -174,7 +174,7 @@ def main_project(args):
     """Handle project subcommand."""
     project = get_project()
     if args.index:
-        for doc in project.index():
+        for doc in project._index():
             print(json.dumps(doc))
         return
     if args.workspace:
diff --git a/signac/contrib/indexing.py b/signac/contrib/indexing.py
deleted file mode 100644
index ee93aa9b7..000000000
--- a/signac/contrib/indexing.py
+++ /dev/null
@@ -1,451 +0,0 @@
-# Copyright (c) 2017 The Regents of the University of Michigan
-# All rights reserved.
-# This software is licensed under the BSD 3-Clause License.
-"""Indexing features."""
-
-import errno
-import hashlib
-import json
-import logging
-import math
-import os
-import re
-import warnings
-
-from ..common import errors
-from .hashing import calc_id
-from .utility import walkdepth
-
-logger = logging.getLogger(__name__)
-
-KEY_PROJECT = "project"
-KEY_FILENAME = "filename"
-KEY_PATH = "root"
-KEY_PAYLOAD = "format"
-
-
-def _compute_file_md5(file):
-    """Calculate and return the md5 hash value for the file data."""
-    m = hashlib.md5()
-    for chunk in iter(lambda: file.read(4096), b""):
-        m.update(chunk)
-    return m.hexdigest()
-
-
-class _BaseCrawler:
-    """Crawl through ``root`` and index all files.
-
-    The crawler creates an index on data, which can be exported to a database
-    for easier access.
-    """
-
-    tags = None
-
-    def __init__(self, root):
-        """Initialize a _BaseCrawler instance.
-
-        Parameters
-        ----------
-        root : str
-            The path to the root directory to crawl through.
-
-        """
-        self.root = os.path.expanduser(root)
-        self.tags = set() if self.tags is None else set(self.tags)
-
-    def docs_from_file(self, dirpath, fn):
-        """Implement this method to generate documents from files.
-
-        Parameters
-        ----------
-        dirpath : str
-            The path of the file, relative to ``root``.
-        fn : str
-            The filename.
-
-        Yields
-        ------
-        dict
-            Index documents.
-
-        """
-        raise NotImplementedError()
-
-    def fetch(self, doc, mode="r"):
-        """Implement this generator method to associate data with a document."""
-        raise errors.FetchError(f"Unable to fetch object for '{doc}'.")
-
-    @classmethod
-    def _calculate_hash(cls, doc, dirpath, fn):
-        blob = json.dumps(doc, sort_keys=True)
-        m = hashlib.md5()
-        m.update(dirpath.encode("utf-8"))
-        m.update(fn.encode("utf-8"))
-        m.update(blob.encode("utf-8"))
-        return m.hexdigest()
-
-    def crawl(self, depth=0):
-        """Crawl through the ``root`` directory.
-
-        The crawler will inspect every file and directory up
-        until the specified ``depth`` and call the
-        :meth:`docs_from_file` method.
-
-        Parameters
-        ----------
-        depth : int
-            Maximum directory depth to crawl. A value of 0 specifies no limit.
-
-        Yields
-        ------
-        dict
-            Document.
-
-        """
-        logger.info(f"Crawling '{self.root}' (depth={depth})...")
-        for dirpath, dirnames, filenames in walkdepth(self.root, depth):
-            for fn in filenames:
-                for doc in self.docs_from_file(dirpath, fn):
-                    logger.debug(f"doc from file: '{os.path.join(dirpath, fn)}'.")
-                    doc.setdefault(KEY_PAYLOAD, None)
-                    doc.setdefault("_id", self._calculate_hash(doc, dirpath, fn))
-                    yield doc
-        logger.info(f"Crawl of '{self.root}' done.")
-
-    def process(self, doc, dirpath, fn):
-        """Implement this method for processing generated documents.
-
-        The default implementation will return the unmodified ``doc``.
-
-        Parameters
-        ----------
-        dirpath : str
-            The path of the file, relative to `root`.
-        fn : str
-            The filename.
-
-        Returns
-        -------
-        dict
-            A document.
-
-        """
-        return doc
-
-
-class _RegexFileCrawler(_BaseCrawler):
-    r"""Generate documents from filenames and associate each file with a data type.
-
-    The :py:class:`_RegexFileCrawler` uses regular expressions to generate
-    data from files. This is a particular easy method to retrieve metadata
-    associated with files. Inherit from this class to configure a crawler
-    for your data structure.
-
-    Let's assume we want to index text files, with a naming pattern, that
-    specifies a parameter ``a`` through the filename, e.g.:
-
-    .. code-block:: python
-
-        ~/my_project/a_0.txt
-        ~/my_project/a_1.txt
-        ...
-
-    A valid regular expression to match this pattern would
-    be: ``.*\/a_(?P<a>\d+)\.txt`` which may be defined for a crawler as such:
-
-    .. code-block:: python
-
-        MyCrawler(_RegexFileCrawler):
-            pass
-
-        MyCrawler.define('.*\/a_(?P<a>\d+)\.txt', 'TextFile')
-
-    """
-
-    "Mapping of compiled regex objects and associated formats."
-    definitions = {}  # type: ignore
-
-    @classmethod
-    def define(cls, regex, format_=None):
-        """Define a format for a particular regular expression.
-
-        Parameters
-        ----------
-        regex : str
-            A regular expression used to match files of the specified format.
-        format_ : object
-            The format associated with all matching files.
-
-        """
-        if isinstance(regex, str):
-            regex = re.compile(regex)
-        definitions = dict(cls.definitions)
-        definitions[regex] = format_
-        cls.definitions = definitions
-
-    @classmethod
-    def compute_file_id(cls, doc, file):
-        """Compute the file id for a given doc and the associated file.
-
-        The resulting id is assigned to ``doc["md5"]``.
-
-        Parameters
-        ----------
-        doc : dict
-            The index document.
-        file : file-like object
-            The associated file
-
-        Returns
-        -------
-        str
-            The file id.
-
-        """
-        file_id = doc["md5"] = _compute_file_md5(file)
-        return file_id
-
-    def docs_from_file(self, dirpath, fn):
-        """Generate documents from filenames.
-
-        This method implements the abstract
-        :py:meth:`~._BaseCrawler.docs_from_file` and yields index
-        documents associated with files.
-
-        Notes
-        -----
-        It is not recommended to reimplement this method to modify
-        documents generated from filenames.
-        See :py:meth:`~_RegexFileCrawler.process` instead.
-
-        Parameters
-        ----------
-        dirpath : str
-            The path of the file relative to root.
-        fn : str
-            The filename of the file.
-
-        Yields
-        ------
-        dict
-            Index document.
-
-        """
-        for regex, format_ in self.definitions.items():
-            m = regex.match(os.path.join(dirpath, fn))
-            if m:
-                doc = self.process(m.groupdict(), dirpath, fn)
-                doc[KEY_FILENAME] = os.path.relpath(
-                    os.path.join(dirpath, fn), self.root
-                )
-                doc[KEY_PATH] = os.path.abspath(self.root)
-                doc[KEY_PAYLOAD] = str(format_)
-                with open(os.path.join(dirpath, fn), "rb") as file:
-                    doc["file_id"] = self.compute_file_id(doc, file)
-                yield doc
-
-    def fetch(self, doc, mode="r"):
-        """Fetch the data associated with ``doc``.
-
-        Parameters
-        ----------
-        doc : dict
-            An index document.
-        mode : str
-            Mode used to open file object.
-
-        Returns
-        -------
-        file-like object
-            The file associated with the index document.
-
-        """
-        fn = doc.get(KEY_FILENAME)
-        if fn:
-            for regex, format_ in self.definitions.items():
-                ffn = os.path.join(self.root, fn)
-                m = regex.match(ffn)
-                if m:
-                    if isinstance(format_, str):
-                        return open(ffn, mode=mode)
-                    else:
-                        for meth in ("read", "close"):
-                            if not callable(getattr(format_, meth, None)):
-                                msg = f"Format {format_} has no {meth}() method."
-                                warnings.warn(msg)
-                        return format_(open(ffn, mode=mode))
-            else:
-                raise errors.FetchError(
-                    f"Unable to match file path of doc '{doc}' to format definition."
-                )
-        else:
-            raise errors.FetchError(f"Insufficient metadata in doc '{doc}'.")
-
-    def process(self, doc, dirpath, fn):
-        """Post-process documents generated from filenames.
-
-        Examples
-        --------
-        .. code-block:: python
-
-            MyCrawler(signac.indexing._RegexFileCrawler):
-                def process(self, doc, dirpath, fn):
-                    doc['long_name_for_a'] = doc['a']
-                    return super(MyCrawler, self).process(doc, dirpath, fn)
-
-        Parameters
-        ----------
-        dirpath : str
-            The path of the file, relative to ``root``.
-        fn : str
-            The filename.
-
-        Returns
-        -------
-        dict
-            An index document.
-
-        """
-        result = {}
-        for key, value in doc.items():
-            if value is None or isinstance(value, bool):
-                result[key] = value
-                continue
-            try:
-                value = float(value)
-            except Exception:
-                result[key] = value
-            else:
-                if not math.isnan(value) or math.isinf(value):
-                    if float(value) == int(value):
-                        result[key] = int(value)
-                    else:
-                        result[key] = float(value)
-        return super().process(result, dirpath, fn)
-
-    def crawl(self, depth=0):
-        if self.definitions:
-            yield from super().crawl(depth=depth)
-        else:
-            return
-
-
-def _index_signac_project_workspace(
-    root,
-    include_job_document=True,
-    fn_statepoint="signac_statepoint.json",
-    fn_job_document="signac_job_document.json",
-    statepoint_index="statepoint",
-    signac_id_alias="_id",
-    encoding="utf-8",
-    statepoint_dict=None,
-):
-    """Yield standard index documents for a signac project workspace."""
-    logger.debug(f"Indexing workspace '{root}'...")
-    m = re.compile(r"[a-f0-9]{32}")
-    try:
-        job_ids = [jid for jid in os.listdir(root) if m.match(jid)]
-    except OSError as error:
-        if error.errno == errno.ENOENT:
-            return
-        else:
-            raise
-    for i, job_id in enumerate(job_ids):
-        if not m.match(job_id):
-            continue
-        doc = {"signac_id": job_id, KEY_PATH: root}
-        if signac_id_alias:
-            doc[signac_id_alias] = job_id
-        fn_sp = os.path.join(root, job_id, fn_statepoint)
-        with open(fn_sp, "rb") as file:
-            sp = json.loads(file.read().decode(encoding))
-            if statepoint_dict is not None:
-                statepoint_dict[job_id] = sp
-            if statepoint_index:
-                doc[statepoint_index] = sp
-            else:
-                doc.update(sp)
-        if include_job_document:
-            fn_doc = os.path.join(root, job_id, fn_job_document)
-            try:
-                with open(fn_doc, "rb") as file:
-                    doc.update(json.loads(file.read().decode(encoding)))
-            except OSError as error:
-                if error.errno != errno.ENOENT:
-                    raise
-        yield doc
-    if job_ids:
-        logger.debug(f"Indexed workspace '{root}', {i + 1} entries.")
-
-
-class _SignacProjectCrawler(_RegexFileCrawler):
-    """Index a signac project workspace.
-
-    Without any file format definitions, this crawler yields index documents
-    for each job, including the state point and the job document.
-
-    See Also
-    --------
-    :py:class:`~._RegexFileCrawler`
-
-    Parameters
-    ----------
-    root : str
-        The path to the project's root directory.
-
-    """
-
-    encoding = "utf-8"
-    statepoint_index = "statepoint"
-    fn_statepoint = "signac_statepoint.json"
-    fn_job_document = "signac_job_document.json"
-    signac_id_alias = "_id"
-
-    def __init__(self, root):
-        from .project import get_project
-
-        root = get_project(root=root).workspace()
-        self._statepoints = {}
-        return super().__init__(root=root)
-
-    def _get_job_id(self, dirpath):
-        return os.path.relpath(dirpath, self.root).split("/")[0]
-
-    def _read_statepoint(self, job_id):
-        fn_sp = os.path.join(self.root, job_id, self.fn_statepoint)
-        with open(fn_sp, "rb") as file:
-            return json.loads(file.read().decode(self.encoding))
-
-    def _get_statepoint(self, job_id):
-        sp = self._statepoints.setdefault(job_id, self._read_statepoint(job_id))
-        assert calc_id(sp) == job_id
-        return sp
-
-    def get_statepoint(self, dirpath):
-        job_id = self._get_job_id(dirpath)
-        return job_id, self._get_statepoint(self, job_id)
-
-    def process(self, doc, dirpath, fn):
-        if dirpath is not None:
-            job_id = self._get_job_id(dirpath)
-            statepoint = self._get_statepoint(job_id)
-            doc["signac_id"] = job_id
-            if self.statepoint_index:
-                doc[self.statepoint_index] = statepoint
-            else:
-                doc.update(statepoint)
-        return super().process(doc, dirpath, fn)
-
-    def crawl(self, depth=0):
-        for doc in _index_signac_project_workspace(
-            root=self.root,
-            fn_statepoint=self.fn_statepoint,
-            fn_job_document=self.fn_job_document,
-            statepoint_index=self.statepoint_index,
-            signac_id_alias=self.signac_id_alias,
-            encoding=self.encoding,
-            statepoint_dict=self._statepoints,
-        ):
-            yield self.process(doc, None, None)
-        for doc in super().crawl(depth=depth):
-            yield doc
diff --git a/signac/contrib/project.py b/signac/contrib/project.py
index e926d25d1..9a92430bd 100644
--- a/signac/contrib/project.py
+++ b/signac/contrib/project.py
@@ -38,7 +38,6 @@
 )
 from .filterparse import _add_prefix, _root_keys, parse_filter
 from .hashing import calc_id
-from .indexing import _SignacProjectCrawler
 from .job import Job
 from .schema import ProjectSchema
 from .utility import _mkdir_p, _nested_dicts_to_dotted_keys, _split_and_print_progress
@@ -816,9 +815,7 @@ def detect_schema(self, exclude_const=False, subset=None, index=None):
         from .schema import _build_job_statepoint_index
 
         if index is None:
-            index = self.index(include_job_document=False)
-        else:
-            warnings.warn(INDEX_DEPRECATION_WARNING, FutureWarning)
+            index = self._index(include_job_document=False)
         if subset is not None:
             subset = {str(s) for s in subset}
             index = [doc for doc in index if doc["_id"] in subset]
@@ -884,9 +881,9 @@ def _find_job_ids(self, filter=None, doc_filter=None, index=None):
             if doc_filter:
                 warnings.warn(DOC_FILTER_WARNING, FutureWarning)
                 filter.update(parse_filter(_add_prefix("doc.", doc_filter)))
-                index = self.index(include_job_document=True)
+                index = self._index(include_job_document=True)
             elif "doc" in _root_keys(filter):
-                index = self.index(include_job_document=True)
+                index = self._index(include_job_document=True)
             else:
                 index = self._sp_index()
         else:
@@ -1883,15 +1880,7 @@ def _read_cache(self):
             logger.debug(f"Read cache in {delta:.3f} seconds.")
             return cache
 
-    @deprecated(
-        deprecated_in="1.8",
-        removed_in="2.0",
-        current_version=__version__,
-        details="Indexing is deprecated.",
-    )
-    def index(
-        self, formats=None, depth=0, skip_errors=False, include_job_document=True
-    ):
+    def _index(self, *, include_job_document=True):
         r"""Generate an index of the project's workspace.
 
         This generator function indexes every file in the project's
@@ -1901,11 +1890,6 @@ def index(
 
         See :ref:`signac project -i <signac-cli-project>` for the command line equivalent.
 
-        .. code-block:: python
-
-            for doc in project.index({r'.*\.txt', 'TextFile'}):
-                print(doc)
-
         Parameters
         ----------
         formats : str, dict
@@ -1928,42 +1912,28 @@ def index(
             Index document.
 
         """
-        if formats is None:
-            root = self.workspace()
+        root = self.workspace()
 
-            def _full_doc(doc):
-                """Add `signac_id` and `root` to the index document.
+        def _full_doc(doc):
+            """Add `signac_id` and `root` to the index document.
 
-                Parameters
-                ----------
-                doc : dict
-                    Index document.
+            Parameters
+            ----------
+            doc : dict
+                Index document.
 
-                Returns
-                -------
-                dict
-                    Modified index document.
+            Returns
+            -------
+            dict
+                Modified index document.
 
-                """
-                doc["signac_id"] = doc["_id"]
-                doc["root"] = root
-                return doc
+            """
+            doc["signac_id"] = doc["_id"]
+            doc["root"] = root
+            return doc
 
-            docs = self._build_index(include_job_document=include_job_document)
-            docs = map(_full_doc, docs)
-        else:
-            if isinstance(formats, str):
-                formats = {formats: "File"}
-
-            class Crawler(_SignacProjectCrawler):
-                pass
-
-            for pattern, fmt in formats.items():
-                Crawler.define(pattern, fmt)
-            crawler = Crawler(self.root_directory())
-            docs = crawler.crawl(depth=depth)
-        if skip_errors:
-            docs = _skip_errors(docs, logger.critical)
+        docs = self._build_index(include_job_document=include_job_document)
+        docs = map(_full_doc, docs)
         for doc in docs:
             yield doc
 
diff --git a/signac/contrib/utility.py b/signac/contrib/utility.py
index 5753e9681..216377a3a 100644
--- a/signac/contrib/utility.py
+++ b/signac/contrib/utility.py
@@ -88,47 +88,6 @@ def add_verbosity_argument(parser, default=0):
     )
 
 
-def walkdepth(path, depth=0):
-    """Transverse the directory starting from path.
-
-    Parameters
-    ----------
-    path :str
-        Directory passed to walk (transverse from).
-    depth : int
-        (Default value = 0)
-
-    Yields
-    ------
-    str
-        When depth==0.
-    tuple
-        When depth>0.
-
-    Raises
-    ------
-    ValueError
-        When the value of depth is negative.
-    OSError
-        When path is not name of a directory.
-
-    """
-    if depth == 0:
-        yield from os.walk(path)
-    elif depth > 0:
-        path = path.rstrip(os.path.sep)
-        if not os.path.isdir(path):
-            raise OSError(f"Not a directory: '{path}'.")
-        num_sep = path.count(os.path.sep)
-        for root, dirs, files in os.walk(path):
-            yield root, dirs, files
-            num_sep_this = root.count(os.path.sep)
-            if num_sep + depth <= num_sep_this:
-                del dirs[:]
-    else:
-        raise ValueError("The value of depth must be non-negative.")
-
-
 def _mkdir_p(path):
     """Make a new directory, or do nothing if the directory already exists.
 
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
deleted file mode 100644
index c8019c5f0..000000000
--- a/tests/test_indexing.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) 2017 The Regents of the University of Michigan
-# All rights reserved.
-# This software is licensed under the BSD 3-Clause License.
-import json
-import os
-import re
-from tempfile import TemporaryDirectory
-from unittest.mock import Mock
-
-import pytest
-
-from signac import Collection
-from signac.contrib import indexing
-from signac.errors import FetchError
-
-
-class TestFormat:
-    def read(self):
-        assert 0
-
-    def close(self):
-        assert 0
-
-
-class TestIndexingBase:
-    @pytest.fixture(autouse=True)
-    def setUp(self, request):
-        self._tmp_dir = TemporaryDirectory(prefix="signac_")
-        request.addfinalizer(self._tmp_dir.cleanup)
-
-    def setup_project(self):
-        def fn(name):
-            return os.path.join(self._tmp_dir.name, name)
-
-        with open(fn("a_0.txt"), "w") as file:
-            file.write('{"a": 0}')
-        with open(fn("a_1.txt"), "w") as file:
-            file.write('{"a": 1}')
-        with open(fn("a_0.json"), "w") as file:
-            json.dump(dict(a=0), file)
-        with open(fn("a_1.json"), "w") as file:
-            json.dump(dict(a=1), file)
-
-    def get_index_collection(self):
-        c = Collection()
-        return Mock(spec=c, wraps=c)
-
-    def test_base_crawler(self):
-        crawler = indexing._BaseCrawler(root=self._tmp_dir.name)
-        assert len(list(crawler.crawl())) == 0
-        doc = dict(a=0)
-        with pytest.raises(FetchError):
-            assert crawler.fetch(doc) is None
-        assert doc == crawler.process(doc, None, None)
-        with pytest.raises(NotImplementedError):
-            for doc in crawler.docs_from_file(None, None):
-                pass
-
-    def test_regex_file_crawler_pre_compiled(self):
-        self.setup_project()
-
-        class Crawler(indexing._RegexFileCrawler):
-            pass
-
-        regex = re.compile(r".*a_(?P<a>\d)\.txt")
-        Crawler.define(regex, TestFormat)
-        crawler = Crawler(root=self._tmp_dir.name)
-        no_find = True
-        for doc in crawler.crawl():
-            no_find = False
-            ffn = os.path.join(doc["root"], doc["filename"])
-            m = regex.match(ffn)
-            assert m is not None
-            assert os.path.isfile(ffn)
-            with open(ffn) as file:
-                doc2 = json.load(file)
-                assert doc2["a"] == doc["a"]
-        assert not no_find
-
-    def test_regex_file_crawler(self):
-        self.setup_project()
-
-        class Crawler(indexing._RegexFileCrawler):
-            pass
-
-        # First test without pattern
-        crawler = Crawler(root=self._tmp_dir.name)
-        assert len(list(crawler.crawl())) == 0
-
-        # Now with pattern(s)
-        pattern = r".*a_(?P<a>\d)\.txt"
-        regex = re.compile(pattern)
-        Crawler.define(pattern, TestFormat)
-        Crawler.define("negativematch", "negativeformat")
-        crawler = Crawler(root=self._tmp_dir.name)
-        no_find = True
-        for doc in crawler.crawl():
-            no_find = False
-            ffn = os.path.join(doc["root"], doc["filename"])
-            m = regex.match(ffn)
-            assert m is not None
-            assert os.path.isfile(ffn)
-            with open(ffn) as file:
-                doc2 = json.load(file)
-                assert doc2["a"] == doc["a"]
-        assert not no_find
-        with pytest.raises(FetchError):
-            crawler.fetch(dict())
-        with pytest.raises(FetchError):
-            crawler.fetch({"filename": "shouldnotmatch"})
-
-    def test_regex_file_crawler_inheritance(self):
-        self.setup_project()
-
-        class CrawlerA(indexing._RegexFileCrawler):
-            pass
-
-        class CrawlerB(indexing._RegexFileCrawler):
-            pass
-
-        CrawlerA.define("a", TestFormat)
-        CrawlerB.define("b", TestFormat)
-        assert len(CrawlerA.definitions) == 1
-        assert len(CrawlerB.definitions) == 1
-
-        class CrawlerC(CrawlerA):
-            pass
-
-        assert len(CrawlerA.definitions) == 1
-        assert len(CrawlerC.definitions) == 1
-        assert len(CrawlerB.definitions) == 1
-        CrawlerC.define("c", TestFormat)
-        assert len(CrawlerA.definitions) == 1
-        assert len(CrawlerB.definitions) == 1
-        assert len(CrawlerC.definitions) == 2
diff --git a/tests/test_project.py b/tests/test_project.py
index c5cb70d12..5c92543e9 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -630,77 +630,18 @@ def test_repair_corrupted_workspace(self):
             logging.disable(logging.NOTSET)
 
     def test_index(self):
-        docs = list(self.project.index(include_job_document=True))
+        docs = list(self.project._index(include_job_document=True))
         assert len(docs) == 0
-        docs = list(self.project.index(include_job_document=False))
+        docs = list(self.project._index(include_job_document=False))
         assert len(docs) == 0
         statepoints = [{"a": i} for i in range(5)]
         for sp in statepoints:
             self.project.open_job(sp).document["test"] = True
         job_ids = {job.id for job in self.project.find_jobs()}
-        docs = list(self.project.index())
+        docs = list(self.project._index())
         job_ids_cmp = {doc["_id"] for doc in docs}
         assert job_ids == job_ids_cmp
         assert len(docs) == len(statepoints)
-        for sp in statepoints:
-            with self.project.open_job(sp):
-                with open("test.txt", "w"):
-                    pass
-        docs = list(
-            self.project.index(
-                {".*" + re.escape(os.path.sep) + r"test\.txt": "TextFile"}
-            )
-        )
-        assert len(docs) == 2 * len(statepoints)
-        assert len({doc["_id"] for doc in docs}) == len(docs)
-
-    # Index schema is changed
-    @pytest.mark.xfail()
-    def test_signac_project_crawler(self):
-        statepoints = [{"a": i} for i in range(5)]
-        for sp in statepoints:
-            self.project.open_job(sp).document["test"] = True
-        job_ids = {job.id for job in self.project.find_jobs()}
-        index = {}
-        for doc in self.project.index():
-            index[doc["_id"]] = doc
-        assert len(index) == len(job_ids)
-        assert set(index.keys()) == set(job_ids)
-        crawler = signac.contrib._SignacProjectCrawler(self.project.root_directory())
-        index2 = {}
-        for doc in crawler.crawl():
-            index2[doc["_id"]] = doc
-        for _id, _id2 in zip(index, index2):
-            assert _id == _id2
-            assert index[_id] == index2[_id]
-        assert index == index2
-        for job in self.project.find_jobs():
-            with open(job.fn("test.txt"), "w") as file:
-                file.write("test\n")
-        formats = {r".*" + re.escape(os.path.sep) + r"test\.txt": "TextFile"}
-        index = {}
-        for doc in self.project.index(formats):
-            index[doc["_id"]] = doc
-        assert len(index) == 2 * len(job_ids)
-
-        class Crawler(signac.contrib._SignacProjectCrawler):
-            called = False
-
-            def process(self_, doc, dirpath, fn):
-                Crawler.called = True
-                doc = super().process(doc=doc, dirpath=dirpath, fn=fn)
-                if "format" in doc and doc["format"] is None:
-                    assert doc["_id"] == doc["signac_id"]
-                return doc
-
-        for p, fmt in formats.items():
-            with pytest.deprecated_call():
-                Crawler.define(p, fmt)
-        index2 = {}
-        for doc in Crawler(root=self.project.root_directory()).crawl():
-            index2[doc["_id"]] = doc
-        assert index == index2
-        assert Crawler.called
 
     def test_custom_project(self):
         class CustomProject(signac.Project):