From fec6a82e53d01338c85e77039a4dc05288f6ab99 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Tue, 28 Feb 2023 12:17:57 -0500
Subject: [PATCH] typing: strict mypy & py.typed

---
 .pre-commit-config.yaml   |  9 ++++-----
 obonet/__init__.py        |  6 ++++--
 obonet/io.py              | 10 +++++++---
 obonet/py.typed           |  0
 obonet/read.py            | 24 +++++++++++-------------
 pyproject.toml            | 19 ++++++++++++++++++-
 tests/__init__.py         |  0
 tests/test_obo_reading.py | 28 ++++++++++++++--------------
 8 files changed, 58 insertions(+), 38 deletions(-)
 create mode 100644 obonet/py.typed
 create mode 100644 tests/__init__.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2fa85fa..d379787 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,8 +18,7 @@ repos:
     hooks:
       - id: black
         language_version: python3
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v0.812
-  #   hooks:
-  #     - id: mypy
-  #       args: ["--strict", "--show-error-codes"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.0.1
+    hooks:
+      - id: mypy
diff --git a/obonet/__init__.py b/obonet/__init__.py
index 3e25a64..77cdfc2 100644
--- a/obonet/__init__.py
+++ b/obonet/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from .read import read_obo
 
 __all__ = [
@@ -5,12 +7,12 @@
 ]
 
 
-def _get_version():
+def _get_version() -> str | None:
     # https://github.com/pypa/setuptools_scm#retrieving-package-version-at-runtime
     from pkg_resources import DistributionNotFound, get_distribution
 
     try:
-        return get_distribution("obonet").version
+        return str(get_distribution("obonet").version)
     except DistributionNotFound:
         return None
 
diff --git a/obonet/io.py b/obonet/io.py
index 68ed665..e43afb7 100644
--- a/obonet/io.py
+++ b/obonet/io.py
@@ -4,11 +4,15 @@
 import io
 import logging
 import mimetypes
+import os
 import re
+from typing import Callable, TextIO, Union
 from urllib.request import urlopen
 
+PathType = Union[str, os.PathLike, TextIO]
 
-def open_read_file(path, encoding: str | None = None):
+
+def open_read_file(path: PathType, encoding: str | None = None) -> TextIO:
     """
     Return a file object from the path. Automatically detects and supports
     URLs and compression. If path is pathlike, it's converted to a string.
@@ -18,7 +22,7 @@ def open_read_file(path, encoding: str | None = None):
     """
     # Convert pathlike objects to string paths
     if hasattr(path, "__fspath__"):
-        path = path.__fspath__()
+        path = os.fspath(path)
 
     if not isinstance(path, str):
         # Passthrough open file buffers without modification
@@ -52,7 +56,7 @@ def open_read_file(path, encoding: str | None = None):
 }
 
 
-def get_opener(filename):
+def get_opener(filename: str) -> Callable[..., TextIO]:
     """
     Automatically detect compression and return the file opening function.
     """
diff --git a/obonet/py.typed b/obonet/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/obonet/read.py b/obonet/read.py
index 7aa0d23..fddbb79 100755
--- a/obonet/read.py
+++ b/obonet/read.py
@@ -3,17 +3,17 @@
 import itertools
 import logging
 import re
-from typing import Any
+from typing import Any, Iterator
 
 import networkx
 
-from .io import open_read_file
+from .io import PathType, open_read_file
 
 logger = logging.getLogger(__name__)
 
 
 def read_obo(
-    path_or_file, ignore_obsolete: bool = True, encoding: str | None = "utf-8"
+    path_or_file: PathType, ignore_obsolete: bool = True, encoding: str | None = "utf-8"
 ) -> networkx.MultiDiGraph[str]:
     """
     Return a networkx.MultiDiGraph of the ontology serialized by the
@@ -34,9 +34,8 @@ def read_obo(
         The character set encoding to use for path_or_file when path_or_file
         is a path/URL. Set to None for platform-dependent locale default.
     """
-    obo_file = open_read_file(path_or_file, encoding=encoding)
-    typedefs, terms, instances, header = get_sections(obo_file)
-    obo_file.close()
+    with open_read_file(path_or_file, encoding=encoding) as obo_file:
+        typedefs, terms, instances, header = get_sections(obo_file)
 
     if "ontology" in header:
         header["name"] = header.get("ontology")
@@ -69,7 +68,7 @@ def read_obo(
 
 
 def get_sections(
-    lines,
+    lines: Iterator[str],
 ) -> tuple[
     list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]
 ]:
@@ -82,11 +81,10 @@ def get_sections(
     typedefs, terms, instances = [], [], []
     header = None
     groups = itertools.groupby(lines, lambda line: line.strip() == "")
-    for is_blank, stanza_lines in groups:
+    for is_blank, stanza_lines_iter in groups:
         if is_blank:
             continue
-        stanza_type_line = next(stanza_lines)
-        stanza_lines = list(stanza_lines)
+        stanza_type_line, *stanza_lines = stanza_lines_iter
         if stanza_type_line.startswith("[Typedef]"):
             typedef = parse_stanza(stanza_lines, typedef_tag_singularity)
             typedefs.append(typedef)
@@ -108,7 +106,7 @@ def get_sections(
 # regular expression to parse key-value pair lines.
 tag_line_pattern = re.compile(
     r"^(?P<tag>.+?): *(?P<value>.+?) ?(?P<trailing_modifier>(?<!\\)\{.*?(?<!\\)\})? ?(?P<comment>(?<!\\)!.*?)?$"
-)  # noqa: E501
+)
 
 
 def parse_tag_line(line: str) -> tuple[str, str | None, str | None, str | None]:
@@ -131,11 +129,11 @@ def parse_tag_line(line: str) -> tuple[str, str | None, str | None, str | None]:
     return tag, value, trailing_modifier, comment
 
 
-def parse_stanza(lines, tag_singularity) -> dict[str, Any]:
+def parse_stanza(lines: list[str], tag_singularity: dict[str, bool]) -> dict[str, Any]:
     """
     Returns a dictionary representation of a stanza.
     """
-    stanza = {}
+    stanza: dict[str, Any] = {}
     for line in lines:
         if line.startswith("!"):
             continue
diff --git a/pyproject.toml b/pyproject.toml
index 5608b5d..9d2fc6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,6 +70,23 @@ select = [
     "W",  # pycode warnings
 ]
 
+[tool.mypy]
+python_version = "3.7"
+strict = true
+
+
 [[tool.mypy.overrides]]
-module = ["networkx.*", "setuptools.*", "pytest.*", "_pytest.*"]
+module = [
+    "networkx.*",
+    "setuptools.*",
+    "pkg_resources.*",
+    "pytest.*",
+    "_pytest.*",
+]
 ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = [
+    "tests.*",
+]
+disallow_untyped_decorators = false
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_obo_reading.py b/tests/test_obo_reading.py
index fb59721..7f939a4 100755
--- a/tests/test_obo_reading.py
+++ b/tests/test_obo_reading.py
@@ -9,7 +9,7 @@
 directory = os.path.dirname(os.path.abspath(__file__))
 
 
-def test_read_taxrank_file():
+def test_read_taxrank_file() -> None:
     """
     Test reading the taxrank ontology OBO file.
     """
@@ -24,20 +24,20 @@ def test_read_taxrank_file():
 
 @pytest.mark.parametrize("extension", ["", ".gz", ".bz2", ".xz"])
 @pytest.mark.parametrize("pathlike", [False, True])
-def test_read_taxrank_path(extension, pathlike):
+def test_read_taxrank_path(extension: str, pathlike: bool) -> None:
     """
     Test reading the taxrank ontology OBO file from paths. Includes reading
     compressed paths.
     """
     path = os.path.join(directory, "data", "taxrank.obo" + extension)
     if pathlike:
-        path = pathlib.Path(path)
+        path = pathlib.Path(path)  # type: ignore [assignment]
     taxrank = obonet.read_obo(path)
     assert len(taxrank) == 61
 
 
 @pytest.mark.parametrize("extension", ["", ".gz", ".bz2", ".xz"])
-def test_read_taxrank_url(extension):
+def test_read_taxrank_url(extension: str) -> None:
     """
     Test reading the taxrank ontology OBO file from paths. Includes reading
     compressed paths.
@@ -48,7 +48,7 @@ def test_read_taxrank_url(extension):
     assert len(taxrank) == 61
 
 
-def test_read_brenda_subset():
+def test_read_brenda_subset() -> None:
     """
     Test reading a subset of the BrendaTissue.obo file. This file does not set
     the ontology tag. See <https://github.com/dhimmel/obonet/issues/10>.
@@ -64,7 +64,7 @@ def test_read_brenda_subset():
 
 
 @pytest.mark.parametrize("ontology", ["doid", "go", "pato"])
-def test_read_obo(ontology):
+def test_read_obo(ontology: str) -> None:
     """
     Test that reading ontology does not error.
     """
@@ -73,7 +73,7 @@ def test_read_obo(ontology):
     assert graph
 
 
-def test_parse_tag_line_newline_agnostic():
+def test_parse_tag_line_newline_agnostic() -> None:
     for line in ["saved-by: vw", "saved-by: vw\n"]:
         tag, value, trailing_modifier, comment = parse_tag_line(line)
         assert tag == "saved-by"
@@ -82,7 +82,7 @@ def test_parse_tag_line_newline_agnostic():
         assert comment is None
 
 
-def test_parse_tag_line_with_tag_and_value():
+def test_parse_tag_line_with_tag_and_value() -> None:
     line = 'synonym: "ovarian ring canal" NARROW []\n'
     tag, value, trailing_modifier, comment = parse_tag_line(line)
     assert tag == "synonym"
@@ -91,7 +91,7 @@ def test_parse_tag_line_with_tag_and_value():
     assert comment is None
 
 
-def test_parse_tag_line_with_tag_value_and_comment():
+def test_parse_tag_line_with_tag_value_and_comment() -> None:
     line = "is_a: GO:0005102 ! receptor binding\n"
     tag, value, trailing_modifier, comment = parse_tag_line(line)
     assert tag == "is_a"
@@ -100,7 +100,7 @@ def test_parse_tag_line_with_tag_value_and_comment():
     assert comment == "receptor binding"
 
 
-def test_parse_tag_line_with_tag_value_and_trailing_modifier():
+def test_parse_tag_line_with_tag_value_and_trailing_modifier() -> None:
     line = 'xref: UMLS:C0226369 {source="ncithesaurus:Obturator_Artery"}\n'
     tag, value, trailing_modifier, comment = parse_tag_line(line)
     assert tag == "xref"
@@ -109,7 +109,7 @@ def test_parse_tag_line_with_tag_value_and_trailing_modifier():
     assert comment is None
 
 
-def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment():
+def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment() -> None:
     line = 'xref: UMLS:C0022131 {source="ncithesaurus:Islet_of_Langerhans"} ! Islets of Langerhans\n'  # noqa: E501
     tag, value, trailing_modifier, comment = parse_tag_line(line)
     assert tag == "xref"
@@ -118,14 +118,14 @@ def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment():
     assert comment == "Islets of Langerhans"
 
 
-def test_parse_tag_line_backslashed_exclamation():
+def test_parse_tag_line_backslashed_exclamation() -> None:
     line = "synonym: not a real example \\!\n"
     tag, value, trailing_modifier, comment = parse_tag_line(line)
     assert tag == "synonym"
     assert value == r"not a real example \!"
 
 
-def test_ignore_obsolete_nodes():
+def test_ignore_obsolete_nodes() -> None:
     """Quick verification that the change doesn't break anything"""
     path = os.path.join(directory, "data", "brenda-subset.obo")
     brenda = obonet.read_obo(path)
@@ -133,7 +133,7 @@ def test_ignore_obsolete_nodes():
     assert "BTO:0000311" not in nodes
 
 
-def test_presence_of_obsolete_nodes():
+def test_presence_of_obsolete_nodes() -> None:
     """Test that we did, indeed, capture those obsolete entries"""
     pytest.importorskip("networkx", minversion="2.0")
     path = os.path.join(directory, "data", "brenda-subset.obo")