diff --git a/tools/markup.py b/tools/markup.py index d9e8230ec..b17e7ad24 100644 --- a/tools/markup.py +++ b/tools/markup.py @@ -4,7 +4,9 @@ import re from html import unescape +from pathlib import Path from typing import TYPE_CHECKING, Any, Iterable, Literal +from urllib import request import mistune.util from mistune import InlineParser as _InlineParser @@ -13,7 +15,6 @@ if TYPE_CHECKING: import sys - from pathlib import Path if sys.version_info >= (3, 11): from typing import TypeAlias @@ -23,6 +24,8 @@ from mistune import BaseRenderer, BlockParser, BlockState, InlineState + Url: TypeAlias = str + Token: TypeAlias = "dict[str, Any]" _RE_LINK: Pattern[str] = re.compile(r"(?<=\[)([^\]]+)(?=\]\([^\)]+\))", re.MULTILINE) @@ -127,13 +130,20 @@ def process_text(self, text: str, state: InlineState) -> None: state.append_token({"type": "text", "raw": _RE_LIQUID_INCLUDE.sub(r"", text)}) -def read_ast_tokens(source: Path, /) -> list[Token]: +def read_ast_tokens(source: Url | Path, /) -> list[Token]: """ Read from ``source``, drop ``BlockState``. Factored out to provide accurate typing. """ - return _Markdown(renderer=None, inline=InlineParser()).read(source)[0] + markdown = _Markdown(renderer=None, inline=InlineParser()) + if isinstance(source, Path): + tokens = markdown.read(source) + else: + with request.urlopen(source) as response: + s = response.read().decode("utf-8") + tokens = markdown.parse(s, markdown.block.state_cls()) + return tokens[0] def rst_syntax_for_class(class_name: str) -> str: diff --git a/tools/schemapi/vega_expr.py b/tools/schemapi/vega_expr.py index ce12660f1..90ed3ad0c 100644 --- a/tools/schemapi/vega_expr.py +++ b/tools/schemapi/vega_expr.py @@ -7,7 +7,6 @@ from collections import deque from inspect import getmembers from itertools import chain -from pathlib import Path from textwrap import TextWrapper as _TextWrapper from textwrap import indent from typing import ( @@ -22,7 +21,6 @@ Sequence, overload, ) -from urllib import request from tools.markup import RSTParse, Token, read_ast_tokens from tools.markup import RSTRenderer as _RSTRenderer @@ -33,6 +31,7 @@ if TYPE_CHECKING: import sys + from pathlib import Path from re import Match, Pattern from mistune import BlockState @@ -43,6 +42,8 @@ from typing_extensions import LiteralString, Self from _typeshed import SupportsKeysAndGetItem + from tools.markup import Url + __all__ = ["parse_expressions", "write_expr_module"] @@ -845,20 +846,6 @@ def from_texts(cls, raw_texts: Iterable[str], /) -> Iterator[Self]: continue -def download_expressions_md(url: str, /) -> Path: - """Download to a temporary file, return that as a ``pathlib.Path``.""" - tmp, _ = request.urlretrieve(url) - fp = Path(tmp) - if not fp.exists(): - msg = ( - f"Expressions download failed: {fp!s}.\n\n" - f"Try manually accessing resource: {url!r}" - ) - raise FileNotFoundError(msg) - else: - return fp - - def expand_urls(url: str, /) -> str: if url.startswith("#"): url = f"{EXPRESSIONS_DOCS_URL}{url}" @@ -935,15 +922,14 @@ def italics_to_backticks(s: str, names: Iterable[str], /) -> str: return re.sub(pattern, r"\g``\g``\g", s) -def parse_expressions(url: str, /) -> Iterator[VegaExprDef]: +def parse_expressions(source: Url | Path, /) -> Iterator[VegaExprDef]: """ - Download, read markdown and eagerly parse signatures of relevant definitions. + Download remote or read local `.md` resource and eagerly parse signatures of relevant definitions. Yields with docs to ensure each can use all remapped names, regardless of the order they appear. """ - tokens = read_ast_tokens(download_expressions_md(url)) + tokens = read_ast_tokens(source) expr_defs = tuple(VegaExprDef.from_tokens(tokens)) - request.urlcleanup() VegaExprDef.remap_title.refresh() for expr_def in expr_defs: yield expr_def.with_doc()