refactor: Factor out download_expressions_md

Also moves away from using the legacy `python2` interace https://docs.python.org/3/library/urllib.request.html#legacy-interface
vega · Oct 7, 2024 · 0969b30 · 0969b30
1 parent 5a08189
commit 0969b30
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 23 deletions.
diff --git a/tools/markup.py b/tools/markup.py
@@ -4,7 +4,9 @@
 
 import re
 from html import unescape
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Iterable, Literal
+from urllib import request
 
 import mistune.util
 from mistune import InlineParser as _InlineParser
@@ -13,7 +15,6 @@
 
 if TYPE_CHECKING:
     import sys
-    from pathlib import Path
 
     if sys.version_info >= (3, 11):
         from typing import TypeAlias
@@ -23,6 +24,8 @@
 
     from mistune import BaseRenderer, BlockParser, BlockState, InlineState
 
+    Url: TypeAlias = str
+
 Token: TypeAlias = "dict[str, Any]"
 
 _RE_LINK: Pattern[str] = re.compile(r"(?<=\[)([^\]]+)(?=\]\([^\)]+\))", re.MULTILINE)
@@ -127,13 +130,20 @@ def process_text(self, text: str, state: InlineState) -> None:
         state.append_token({"type": "text", "raw": _RE_LIQUID_INCLUDE.sub(r"", text)})
 
 
-def read_ast_tokens(source: Path, /) -> list[Token]:
+def read_ast_tokens(source: Url | Path, /) -> list[Token]:
     """
     Read from ``source``, drop ``BlockState``.
 
     Factored out to provide accurate typing.
     """
-    return _Markdown(renderer=None, inline=InlineParser()).read(source)[0]
+    markdown = _Markdown(renderer=None, inline=InlineParser())
+    if isinstance(source, Path):
+        tokens = markdown.read(source)
+    else:
+        with request.urlopen(source) as response:
+            s = response.read().decode("utf-8")
+        tokens = markdown.parse(s, markdown.block.state_cls())
+    return tokens[0]
 
 
 def rst_syntax_for_class(class_name: str) -> str:

diff --git a/tools/schemapi/vega_expr.py b/tools/schemapi/vega_expr.py
@@ -7,7 +7,6 @@
 from collections import deque
 from inspect import getmembers
 from itertools import chain
-from pathlib import Path
 from textwrap import TextWrapper as _TextWrapper
 from textwrap import indent
 from typing import (
@@ -22,7 +21,6 @@
     Sequence,
     overload,
 )
-from urllib import request
 
 from tools.markup import RSTParse, Token, read_ast_tokens
 from tools.markup import RSTRenderer as _RSTRenderer
@@ -33,6 +31,7 @@
 
 if TYPE_CHECKING:
     import sys
+    from pathlib import Path
     from re import Match, Pattern
 
     from mistune import BlockState
@@ -43,6 +42,8 @@
         from typing_extensions import LiteralString, Self
     from _typeshed import SupportsKeysAndGetItem
 
+    from tools.markup import Url
+
 __all__ = ["parse_expressions", "write_expr_module"]
 
 
@@ -845,20 +846,6 @@ def from_texts(cls, raw_texts: Iterable[str], /) -> Iterator[Self]:
                     continue
 
 
-def download_expressions_md(url: str, /) -> Path:
-    """Download to a temporary file, return that as a ``pathlib.Path``."""
-    tmp, _ = request.urlretrieve(url)
-    fp = Path(tmp)
-    if not fp.exists():
-        msg = (
-            f"Expressions download failed: {fp!s}.\n\n"
-            f"Try manually accessing resource: {url!r}"
-        )
-        raise FileNotFoundError(msg)
-    else:
-        return fp
-
-
 def expand_urls(url: str, /) -> str:
     if url.startswith("#"):
         url = f"{EXPRESSIONS_DOCS_URL}{url}"
@@ -935,15 +922,14 @@ def italics_to_backticks(s: str, names: Iterable[str], /) -> str:
     return re.sub(pattern, r"\g<not_link_start>``\g<name>``\g<not_link_end>", s)
 
 
-def parse_expressions(url: str, /) -> Iterator[VegaExprDef]:
+def parse_expressions(source: Url | Path, /) -> Iterator[VegaExprDef]:
     """
-    Download, read markdown and eagerly parse signatures of relevant definitions.
+    Download remote or read local `.md` resource and eagerly parse signatures of relevant definitions.
 
     Yields with docs to ensure each can use all remapped names, regardless of the order they appear.
     """
-    tokens = read_ast_tokens(download_expressions_md(url))
+    tokens = read_ast_tokens(source)
     expr_defs = tuple(VegaExprDef.from_tokens(tokens))
-    request.urlcleanup()
     VegaExprDef.remap_title.refresh()
     for expr_def in expr_defs:
         yield expr_def.with_doc()