Skip to content

Commit

Permalink
refactor: Factor out download_expressions_md
Browse files Browse the repository at this point in the history
Also moves away from using the legacy `python2` interace
https://docs.python.org/3/library/urllib.request.html#legacy-interface
  • Loading branch information
dangotbanned committed Oct 7, 2024
1 parent 5a08189 commit 0969b30
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 23 deletions.
16 changes: 13 additions & 3 deletions tools/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

import re
from html import unescape
from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Literal
from urllib import request

import mistune.util
from mistune import InlineParser as _InlineParser
Expand All @@ -13,7 +15,6 @@

if TYPE_CHECKING:
import sys
from pathlib import Path

if sys.version_info >= (3, 11):
from typing import TypeAlias
Expand All @@ -23,6 +24,8 @@

from mistune import BaseRenderer, BlockParser, BlockState, InlineState

Url: TypeAlias = str

Token: TypeAlias = "dict[str, Any]"

_RE_LINK: Pattern[str] = re.compile(r"(?<=\[)([^\]]+)(?=\]\([^\)]+\))", re.MULTILINE)
Expand Down Expand Up @@ -127,13 +130,20 @@ def process_text(self, text: str, state: InlineState) -> None:
state.append_token({"type": "text", "raw": _RE_LIQUID_INCLUDE.sub(r"", text)})


def read_ast_tokens(source: Path, /) -> list[Token]:
def read_ast_tokens(source: Url | Path, /) -> list[Token]:
"""
Read from ``source``, drop ``BlockState``.
Factored out to provide accurate typing.
"""
return _Markdown(renderer=None, inline=InlineParser()).read(source)[0]
markdown = _Markdown(renderer=None, inline=InlineParser())
if isinstance(source, Path):
tokens = markdown.read(source)
else:
with request.urlopen(source) as response:
s = response.read().decode("utf-8")
tokens = markdown.parse(s, markdown.block.state_cls())
return tokens[0]


def rst_syntax_for_class(class_name: str) -> str:
Expand Down
26 changes: 6 additions & 20 deletions tools/schemapi/vega_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from collections import deque
from inspect import getmembers
from itertools import chain
from pathlib import Path
from textwrap import TextWrapper as _TextWrapper
from textwrap import indent
from typing import (
Expand All @@ -22,7 +21,6 @@
Sequence,
overload,
)
from urllib import request

from tools.markup import RSTParse, Token, read_ast_tokens
from tools.markup import RSTRenderer as _RSTRenderer
Expand All @@ -33,6 +31,7 @@

if TYPE_CHECKING:
import sys
from pathlib import Path
from re import Match, Pattern

from mistune import BlockState
Expand All @@ -43,6 +42,8 @@
from typing_extensions import LiteralString, Self
from _typeshed import SupportsKeysAndGetItem

from tools.markup import Url

__all__ = ["parse_expressions", "write_expr_module"]


Expand Down Expand Up @@ -845,20 +846,6 @@ def from_texts(cls, raw_texts: Iterable[str], /) -> Iterator[Self]:
continue


def download_expressions_md(url: str, /) -> Path:
"""Download to a temporary file, return that as a ``pathlib.Path``."""
tmp, _ = request.urlretrieve(url)
fp = Path(tmp)
if not fp.exists():
msg = (
f"Expressions download failed: {fp!s}.\n\n"
f"Try manually accessing resource: {url!r}"
)
raise FileNotFoundError(msg)
else:
return fp


def expand_urls(url: str, /) -> str:
if url.startswith("#"):
url = f"{EXPRESSIONS_DOCS_URL}{url}"
Expand Down Expand Up @@ -935,15 +922,14 @@ def italics_to_backticks(s: str, names: Iterable[str], /) -> str:
return re.sub(pattern, r"\g<not_link_start>``\g<name>``\g<not_link_end>", s)


def parse_expressions(url: str, /) -> Iterator[VegaExprDef]:
def parse_expressions(source: Url | Path, /) -> Iterator[VegaExprDef]:
"""
Download, read markdown and eagerly parse signatures of relevant definitions.
Download remote or read local `.md` resource and eagerly parse signatures of relevant definitions.
Yields with docs to ensure each can use all remapped names, regardless of the order they appear.
"""
tokens = read_ast_tokens(download_expressions_md(url))
tokens = read_ast_tokens(source)
expr_defs = tuple(VegaExprDef.from_tokens(tokens))
request.urlcleanup()
VegaExprDef.remap_title.refresh()
for expr_def in expr_defs:
yield expr_def.with_doc()
Expand Down

0 comments on commit 0969b30

Please sign in to comment.