From fec6a82e53d01338c85e77039a4dc05288f6ab99 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 28 Feb 2023 12:17:57 -0500 Subject: [PATCH] typing: strict mypy & py.typed --- .pre-commit-config.yaml | 9 ++++----- obonet/__init__.py | 6 ++++-- obonet/io.py | 10 +++++++--- obonet/py.typed | 0 obonet/read.py | 24 +++++++++++------------- pyproject.toml | 19 ++++++++++++++++++- tests/__init__.py | 0 tests/test_obo_reading.py | 28 ++++++++++++++-------------- 8 files changed, 58 insertions(+), 38 deletions(-) create mode 100644 obonet/py.typed create mode 100644 tests/__init__.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2fa85fa..d379787 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,8 +18,7 @@ repos: hooks: - id: black language_version: python3 - # - repo: https://github.com/pre-commit/mirrors-mypy - # rev: v0.812 - # hooks: - # - id: mypy - # args: ["--strict", "--show-error-codes"] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.0.1 + hooks: + - id: mypy diff --git a/obonet/__init__.py b/obonet/__init__.py index 3e25a64..77cdfc2 100644 --- a/obonet/__init__.py +++ b/obonet/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .read import read_obo __all__ = [ @@ -5,12 +7,12 @@ ] -def _get_version(): +def _get_version() -> str | None: # https://github.com/pypa/setuptools_scm#retrieving-package-version-at-runtime from pkg_resources import DistributionNotFound, get_distribution try: - return get_distribution("obonet").version + return str(get_distribution("obonet").version) except DistributionNotFound: return None diff --git a/obonet/io.py b/obonet/io.py index 68ed665..e43afb7 100644 --- a/obonet/io.py +++ b/obonet/io.py @@ -4,11 +4,15 @@ import io import logging import mimetypes +import os import re +from typing import Callable, TextIO, Union from urllib.request import urlopen +PathType = Union[str, os.PathLike, TextIO] -def open_read_file(path, encoding: str | None = None): + +def open_read_file(path: PathType, encoding: str | None = None) -> TextIO: """ Return a file object from the path. Automatically detects and supports URLs and compression. If path is pathlike, it's converted to a string. @@ -18,7 +22,7 @@ def open_read_file(path, encoding: str | None = None): """ # Convert pathlike objects to string paths if hasattr(path, "__fspath__"): - path = path.__fspath__() + path = os.fspath(path) if not isinstance(path, str): # Passthrough open file buffers without modification @@ -52,7 +56,7 @@ def open_read_file(path, encoding: str | None = None): } -def get_opener(filename): +def get_opener(filename: str) -> Callable[..., TextIO]: """ Automatically detect compression and return the file opening function. """ diff --git a/obonet/py.typed b/obonet/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/obonet/read.py b/obonet/read.py index 7aa0d23..fddbb79 100755 --- a/obonet/read.py +++ b/obonet/read.py @@ -3,17 +3,17 @@ import itertools import logging import re -from typing import Any +from typing import Any, Iterator import networkx -from .io import open_read_file +from .io import PathType, open_read_file logger = logging.getLogger(__name__) def read_obo( - path_or_file, ignore_obsolete: bool = True, encoding: str | None = "utf-8" + path_or_file: PathType, ignore_obsolete: bool = True, encoding: str | None = "utf-8" ) -> networkx.MultiDiGraph[str]: """ Return a networkx.MultiDiGraph of the ontology serialized by the @@ -34,9 +34,8 @@ def read_obo( The character set encoding to use for path_or_file when path_or_file is a path/URL. Set to None for platform-dependent locale default. """ - obo_file = open_read_file(path_or_file, encoding=encoding) - typedefs, terms, instances, header = get_sections(obo_file) - obo_file.close() + with open_read_file(path_or_file, encoding=encoding) as obo_file: + typedefs, terms, instances, header = get_sections(obo_file) if "ontology" in header: header["name"] = header.get("ontology") @@ -69,7 +68,7 @@ def read_obo( def get_sections( - lines, + lines: Iterator[str], ) -> tuple[ list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], dict[str, Any] ]: @@ -82,11 +81,10 @@ def get_sections( typedefs, terms, instances = [], [], [] header = None groups = itertools.groupby(lines, lambda line: line.strip() == "") - for is_blank, stanza_lines in groups: + for is_blank, stanza_lines_iter in groups: if is_blank: continue - stanza_type_line = next(stanza_lines) - stanza_lines = list(stanza_lines) + stanza_type_line, *stanza_lines = stanza_lines_iter if stanza_type_line.startswith("[Typedef]"): typedef = parse_stanza(stanza_lines, typedef_tag_singularity) typedefs.append(typedef) @@ -108,7 +106,7 @@ def get_sections( # regular expression to parse key-value pair lines. tag_line_pattern = re.compile( r"^(?P.+?): *(?P.+?) ?(?P(?(? tuple[str, str | None, str | None, str | None]: @@ -131,11 +129,11 @@ def parse_tag_line(line: str) -> tuple[str, str | None, str | None, str | None]: return tag, value, trailing_modifier, comment -def parse_stanza(lines, tag_singularity) -> dict[str, Any]: +def parse_stanza(lines: list[str], tag_singularity: dict[str, bool]) -> dict[str, Any]: """ Returns a dictionary representation of a stanza. """ - stanza = {} + stanza: dict[str, Any] = {} for line in lines: if line.startswith("!"): continue diff --git a/pyproject.toml b/pyproject.toml index 5608b5d..9d2fc6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,23 @@ select = [ "W", # pycode warnings ] +[tool.mypy] +python_version = "3.7" +strict = true + + [[tool.mypy.overrides]] -module = ["networkx.*", "setuptools.*", "pytest.*", "_pytest.*"] +module = [ + "networkx.*", + "setuptools.*", + "pkg_resources.*", + "pytest.*", + "_pytest.*", +] ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = [ + "tests.*", +] +disallow_untyped_decorators = false \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_obo_reading.py b/tests/test_obo_reading.py index fb59721..7f939a4 100755 --- a/tests/test_obo_reading.py +++ b/tests/test_obo_reading.py @@ -9,7 +9,7 @@ directory = os.path.dirname(os.path.abspath(__file__)) -def test_read_taxrank_file(): +def test_read_taxrank_file() -> None: """ Test reading the taxrank ontology OBO file. """ @@ -24,20 +24,20 @@ def test_read_taxrank_file(): @pytest.mark.parametrize("extension", ["", ".gz", ".bz2", ".xz"]) @pytest.mark.parametrize("pathlike", [False, True]) -def test_read_taxrank_path(extension, pathlike): +def test_read_taxrank_path(extension: str, pathlike: bool) -> None: """ Test reading the taxrank ontology OBO file from paths. Includes reading compressed paths. """ path = os.path.join(directory, "data", "taxrank.obo" + extension) if pathlike: - path = pathlib.Path(path) + path = pathlib.Path(path) # type: ignore [assignment] taxrank = obonet.read_obo(path) assert len(taxrank) == 61 @pytest.mark.parametrize("extension", ["", ".gz", ".bz2", ".xz"]) -def test_read_taxrank_url(extension): +def test_read_taxrank_url(extension: str) -> None: """ Test reading the taxrank ontology OBO file from paths. Includes reading compressed paths. @@ -48,7 +48,7 @@ def test_read_taxrank_url(extension): assert len(taxrank) == 61 -def test_read_brenda_subset(): +def test_read_brenda_subset() -> None: """ Test reading a subset of the BrendaTissue.obo file. This file does not set the ontology tag. See . @@ -64,7 +64,7 @@ def test_read_brenda_subset(): @pytest.mark.parametrize("ontology", ["doid", "go", "pato"]) -def test_read_obo(ontology): +def test_read_obo(ontology: str) -> None: """ Test that reading ontology does not error. """ @@ -73,7 +73,7 @@ def test_read_obo(ontology): assert graph -def test_parse_tag_line_newline_agnostic(): +def test_parse_tag_line_newline_agnostic() -> None: for line in ["saved-by: vw", "saved-by: vw\n"]: tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "saved-by" @@ -82,7 +82,7 @@ def test_parse_tag_line_newline_agnostic(): assert comment is None -def test_parse_tag_line_with_tag_and_value(): +def test_parse_tag_line_with_tag_and_value() -> None: line = 'synonym: "ovarian ring canal" NARROW []\n' tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "synonym" @@ -91,7 +91,7 @@ def test_parse_tag_line_with_tag_and_value(): assert comment is None -def test_parse_tag_line_with_tag_value_and_comment(): +def test_parse_tag_line_with_tag_value_and_comment() -> None: line = "is_a: GO:0005102 ! receptor binding\n" tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "is_a" @@ -100,7 +100,7 @@ def test_parse_tag_line_with_tag_value_and_comment(): assert comment == "receptor binding" -def test_parse_tag_line_with_tag_value_and_trailing_modifier(): +def test_parse_tag_line_with_tag_value_and_trailing_modifier() -> None: line = 'xref: UMLS:C0226369 {source="ncithesaurus:Obturator_Artery"}\n' tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "xref" @@ -109,7 +109,7 @@ def test_parse_tag_line_with_tag_value_and_trailing_modifier(): assert comment is None -def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment(): +def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment() -> None: line = 'xref: UMLS:C0022131 {source="ncithesaurus:Islet_of_Langerhans"} ! Islets of Langerhans\n' # noqa: E501 tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "xref" @@ -118,14 +118,14 @@ def test_parse_tag_line_with_tag_value_trailing_modifier_and_comment(): assert comment == "Islets of Langerhans" -def test_parse_tag_line_backslashed_exclamation(): +def test_parse_tag_line_backslashed_exclamation() -> None: line = "synonym: not a real example \\!\n" tag, value, trailing_modifier, comment = parse_tag_line(line) assert tag == "synonym" assert value == r"not a real example \!" -def test_ignore_obsolete_nodes(): +def test_ignore_obsolete_nodes() -> None: """Quick verification that the change doesn't break anything""" path = os.path.join(directory, "data", "brenda-subset.obo") brenda = obonet.read_obo(path) @@ -133,7 +133,7 @@ def test_ignore_obsolete_nodes(): assert "BTO:0000311" not in nodes -def test_presence_of_obsolete_nodes(): +def test_presence_of_obsolete_nodes() -> None: """Test that we did, indeed, capture those obsolete entries""" pytest.importorskip("networkx", minversion="2.0") path = os.path.join(directory, "data", "brenda-subset.obo")