From 81cb4bf5d6c589c0e40e5c223350e82d76d84301 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 26 Oct 2023 00:05:54 +0200 Subject: [PATCH 1/5] Improve type annotations (add more and fix wrong ones) The origins of these are three-fold: * Merging in stubs from https://github.com/python/typeshed/tree/main/stubs/Markdown using "merge-pyi" - Note: we can consider these annotations to be the important ones because it's what people have been adding according to their own need * Double-checking around places where stubs were already added from the above, particularly conflicts with annotations that got added in this repository already + Taking the opportunity to declare a generic "Registry of T" class * Running mypy and eliminating the most glaring errors it reported --- markdown/blockparser.py | 13 ++++---- markdown/blockprocessors.py | 12 ++++---- markdown/core.py | 12 +++++--- markdown/extensions/__init__.py | 10 +++--- markdown/extensions/attr_list.py | 2 +- markdown/extensions/footnotes.py | 14 ++++----- markdown/extensions/meta.py | 2 +- markdown/extensions/smarty.py | 10 +++--- markdown/extensions/toc.py | 12 ++++---- markdown/extensions/wikilinks.py | 15 ++++----- markdown/htmlparser.py | 6 ++-- markdown/inlinepatterns.py | 52 +++++++++++++++++--------------- markdown/postprocessors.py | 2 +- markdown/preprocessors.py | 2 +- markdown/treeprocessors.py | 29 ++++++++++++------ markdown/util.py | 51 +++++++++++++++++++------------ 16 files changed, 137 insertions(+), 107 deletions(-) diff --git a/markdown/blockparser.py b/markdown/blockparser.py index 0e561613..549c9ecd 100644 --- a/markdown/blockparser.py +++ b/markdown/blockparser.py @@ -30,11 +30,12 @@ from __future__ import annotations import xml.etree.ElementTree as etree -from typing import TYPE_CHECKING, Sequence, Any +from typing import TYPE_CHECKING, Iterable, Any from . import util if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from .blockprocessors import BlockProcessor class State(list): @@ -59,7 +60,7 @@ def set(self, state: Any): """ Set a new state. """ self.append(state) - def reset(self): + def reset(self) -> None: """ Step back one step in nested state. """ self.pop() @@ -92,11 +93,11 @@ def __init__(self, md: Markdown): [`blockprocessors`][markdown.blockprocessors]. """ - self.blockprocessors = util.Registry() + self.blockprocessors: util.Registry[BlockProcessor] = util.Registry() self.state = State() self.md = md - def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree: + def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree: """ Parse a Markdown document into an `ElementTree`. Given a list of lines, an `ElementTree` object (not just a parent @@ -116,7 +117,7 @@ def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree: self.parseChunk(self.root, '\n'.join(lines)) return etree.ElementTree(self.root) - def parseChunk(self, parent: etree.Element, text: str): + def parseChunk(self, parent: etree.Element, text: str) -> None: """ Parse a chunk of Markdown text and attach to given `etree` node. While the `text` argument is generally assumed to contain multiple @@ -134,7 +135,7 @@ def parseChunk(self, parent: etree.Element, text: str): """ self.parseBlocks(parent, text.split('\n\n')) - def parseBlocks(self, parent: etree.Element, blocks: Sequence[str]): + def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None: """ Process blocks of Markdown text and attach to given `etree` node. Given a list of `blocks`, each `blockprocessor` is stepped through diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 7a3688c2..d8084680 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -82,7 +82,7 @@ def lastChild(self, parent: etree.Element) -> etree.Element | None: else: return None - def detab(self, text: str, length: int = None) -> str: + def detab(self, text: str, length: int | None = None) -> tuple[str, str]: """ Remove a tab from the front of each line of the given text. """ if length is None: length = self.tab_length @@ -105,7 +105,7 @@ def looseDetab(self, text: str, level: int = 1) -> str: lines[i] = lines[i][self.tab_length*level:] return '\n'.join(lines) - def test(self, parent: etree.Element, block: list[str]) -> bool: + def test(self, parent: etree.Element, block: str) -> bool: """ Test for block type. Must be overridden by subclasses. As the parser loops through processors, it will call the `test` @@ -214,7 +214,7 @@ def run(self, parent, blocks): self.create_item(sibling, block) self.parser.state.reset() - def create_item(self, parent: etree.Element, block: str): + def create_item(self, parent: etree.Element, block: str) -> None: """ Create a new `li` and parse the block with it as the parent. """ li = etree.SubElement(parent, 'li') self.parser.parseBlocks(li, [block]) @@ -329,7 +329,7 @@ class OListProcessor(BlockProcessor): TAG: str = 'ol' """ The tag used for the the wrapping element. """ - STARTSWITH: int = '1' + STARTSWITH: str = '1' """ The integer (as a string ) with which the list starts. For example, if a list is initialized as `3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`. @@ -342,7 +342,7 @@ class OListProcessor(BlockProcessor): This is the list of types which can be mixed. """ - def __init__(self, parser): + def __init__(self, parser: BlockParser): super().__init__(parser) # Detect an item (`1. item`). `group(1)` contains contents of item. self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1)) @@ -448,7 +448,7 @@ class UListProcessor(OListProcessor): TAG: str = 'ul' """ The tag used for the the wrapping element. """ - def __init__(self, parser): + def __init__(self, parser: BlockParser): super().__init__(parser) # Detect an item (`1. item`). `group(1)` contains contents of item. self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1)) diff --git a/markdown/core.py b/markdown/core.py index 51fe8ee8..6b556b45 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -23,7 +23,7 @@ import sys import logging import importlib -from typing import TYPE_CHECKING, Any, TextIO, Callable +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO from . import util from .preprocessors import build_preprocessors from .blockprocessors import build_block_parser @@ -76,7 +76,7 @@ class Markdown: doc_tag = "div" # Element used to wrap document - later removed - output_formats: dict[str, Callable[Element]] = { + output_formats: ClassVar[dict[str, Callable[[Element], str]]] = { 'html': to_html_string, 'xhtml': to_xhtml_string, } @@ -156,7 +156,11 @@ def build_parser(self) -> Markdown: self.postprocessors = build_postprocessors(self) return self - def registerExtensions(self, extensions: list[Extension | str], configs: dict[str, dict[str, Any]]) -> Markdown: + def registerExtensions( + self, + extensions: Sequence[Extension | str], + configs: Mapping[str, Mapping[str, Any]] + ) -> Markdown: """ Load a list of extensions into an instance of the `Markdown` class. @@ -188,7 +192,7 @@ def registerExtensions(self, extensions: list[Extension | str], configs: dict[st ) return self - def build_extension(self, ext_name: str, configs: dict[str, Any]) -> Extension: + def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension: """ Build extension from a string name, then return an instance using the given `configs`. diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index 36baf1f7..070c4cce 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -27,7 +27,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Mapping, Sequence from ..util import parseBoolValue if TYPE_CHECKING: # pragma: no cover @@ -37,7 +37,7 @@ class Extension: """ Base class for extensions to subclass. """ - config: dict[str, list[Any, str]] = {} + config: Mapping[str, list] = {} """ Default configuration for an extension. @@ -91,7 +91,7 @@ def getConfigInfo(self) -> list[tuple[str, str]]: """ return [(key, self.config[key][1]) for key in self.config.keys()] - def setConfig(self, key: str, value: Any): + def setConfig(self, key: str, value: Any) -> None: """ Set a configuration option. @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any): value = parseBoolValue(value, preserve_none=True) self.config[key][0] = value - def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]): + def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]): """ Loop through a collection of configuration options, passing each to [`setConfig`][markdown.extensions.Extension.setConfig]. @@ -129,7 +129,7 @@ def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]): for key, value in items: self.setConfig(key, value) - def extendMarkdown(self, md: Markdown): + def extendMarkdown(self, md: Markdown) -> None: """ Add the various processors and patterns to the Markdown Instance. diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index f430ec30..0c317d1b 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -146,7 +146,7 @@ def run(self, doc: Element): self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():] - def assign_attrs(self, elem: Element, attrs: dict[str, str]): + def assign_attrs(self, elem: Element, attrs: str) -> None: """ Assign `attrs` to element. """ for k, v in get_attrs(attrs): if k == '.': diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index cf1ab43a..2424dbc8 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -98,14 +98,14 @@ def extendMarkdown(self, md): # Insert a postprocessor after amp_substitute processor md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25) - def reset(self): + def reset(self) -> None: """ Clear footnotes on reset, and prepare for distinct document. """ - self.footnotes = OrderedDict() + self.footnotes: OrderedDict[str, str] = OrderedDict() self.unique_prefix += 1 self.found_refs = {} self.used_refs = set() - def unique_ref(self, reference, found=False): + def unique_ref(self, reference, found: bool = False): """ Get a unique reference if there are duplicates. """ if not found: return reference @@ -144,7 +144,7 @@ def finder(element): res = finder(root) return res - def setFootnote(self, id, text): + def setFootnote(self, id, text) -> None: """ Store a footnote for later retrieval. """ self.footnotes[id] = text @@ -159,7 +159,7 @@ def makeFootnoteId(self, id): else: return 'fn{}{}'.format(self.get_separator(), id) - def makeFootnoteRefId(self, id, found=False): + def makeFootnoteRefId(self, id, found: bool = False): """ Return footnote back-link id. """ if self.getConfig("UNIQUE_IDS"): return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) @@ -329,7 +329,7 @@ class FootnotePostTreeprocessor(Treeprocessor): def __init__(self, footnotes): self.footnotes = footnotes - def add_duplicates(self, li, duplicates): + def add_duplicates(self, li, duplicates) -> None: """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """ for link in li.iter('a'): # Find the link that needs to be duplicated. @@ -355,7 +355,7 @@ def get_num_duplicates(self, li): link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) return self.footnotes.found_refs.get(link_id, 0) - def handle_duplicates(self, parent): + def handle_duplicates(self, parent) -> None: """ Find duplicate footnotes and format and add the duplicates. """ for li in list(parent): # Check number of duplicates footnotes and insert diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index 147d3b93..82179273 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -44,7 +44,7 @@ def extendMarkdown(self, md): self.md = md md.preprocessors.register(MetaPreprocessor(md), 'meta', 27) - def reset(self): + def reset(self) -> None: self.md.Meta = {} diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index ca6a017d..3274bf86 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -193,7 +193,7 @@ def _addPatterns(self, md, patterns, serie, priority): name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) - def educateDashes(self, md): + def educateDashes(self, md) -> None: emDashesPattern = SubstituteTextPattern( r'(? None: ellipsesPattern = SubstituteTextPattern( r'(? None: leftAngledQuotePattern = SubstituteTextPattern( r'\<\<', (self.substitutions['left-angle-quote'],), md ) @@ -219,7 +219,7 @@ def educateAngledQuotes(self, md): self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) - def educateQuotes(self, md): + def educateQuotes(self, md) -> None: lsquo = self.substitutions['left-single-quote'] rsquo = self.substitutions['right-single-quote'] ldquo = self.substitutions['left-double-quote'] @@ -243,7 +243,7 @@ def educateQuotes(self, md): def extendMarkdown(self, md): configs = self.getConfigs() - self.inlinePatterns = Registry() + self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry() if configs['smart_ellipses']: self.educateEllipses(md) if configs['smart_quotes']: diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 5490d0dd..64c20c80 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -71,7 +71,7 @@ def get_name(el): return ''.join(text).strip() -def stashedHTML2text(text, md, strip_entities=True): +def stashedHTML2text(text, md, strip_entities: bool = True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ @@ -198,7 +198,7 @@ def iterparent(self, node): yield node, child yield from self.iterparent(child) - def replace_marker(self, root, elem): + def replace_marker(self, root, elem) -> None: """ Replace marker with elem. """ for (p, c) in self.iterparent(root): text = ''.join(c.itertext()).strip() @@ -219,14 +219,14 @@ def replace_marker(self, root, elem): p[i] = elem break - def set_level(self, elem): + def set_level(self, elem) -> None: """ Adjust header level according to base level. """ level = int(elem.tag[-1]) + self.base_level if level > 6: level = 6 elem.tag = 'h%d' % level - def add_anchor(self, c, elem_id): + def add_anchor(self, c, elem_id) -> None: anchor = etree.Element("a") anchor.text = c.text anchor.attrib["href"] = "#" + elem_id @@ -238,7 +238,7 @@ def add_anchor(self, c, elem_id): c.remove(c[0]) c.append(anchor) - def add_permalink(self, c, elem_id): + def add_permalink(self, c, elem_id) -> None: permalink = etree.Element("a") permalink.text = ("%spara;" % AMP_SUBSTITUTE if self.use_permalinks is True @@ -399,7 +399,7 @@ def extendMarkdown(self, md): tocext = self.TreeProcessorClass(md, self.getConfigs()) md.treeprocessors.register(tocext, 'toc', 5) - def reset(self): + def reset(self) -> None: self.md.toc = '' self.md.toc_tokens = [] diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 9d5acfa3..763da8e9 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -82,13 +82,14 @@ def _getMeta(self): base_url = self.config['base_url'] end_url = self.config['end_url'] html_class = self.config['html_class'] - if hasattr(self.md, 'Meta'): - if 'wiki_base_url' in self.md.Meta: - base_url = self.md.Meta['wiki_base_url'][0] - if 'wiki_end_url' in self.md.Meta: - end_url = self.md.Meta['wiki_end_url'][0] - if 'wiki_html_class' in self.md.Meta: - html_class = self.md.Meta['wiki_html_class'][0] + meta = getattr(self.md, 'Meta', None) + if meta is not None: + if 'wiki_base_url' in meta: + base_url = meta['wiki_base_url'][0] + if 'wiki_end_url' in meta: + end_url = meta['wiki_end_url'][0] + if 'wiki_html_class' in meta: + html_class = meta['wiki_html_class'][0] return base_url, end_url, html_class diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index bf70b73d..f8289b32 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -152,7 +152,7 @@ def get_endtag_text(self, tag: str) -> str: # Failed to extract from raw data. Assume well formed and lowercase. return ''.format(tag) - def handle_starttag(self, tag: str, attrs: dict[str, str]): + def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]): # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags: self.handle_startendtag(tag, attrs) @@ -231,7 +231,7 @@ def handle_empty_tag(self, data: str, is_block: bool): else: self.cleandoc.append(data) - def handle_startendtag(self, tag: str, attrs: dict[str, str]): + def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]): self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag)) def handle_charref(self, name: str): @@ -273,7 +273,7 @@ def parse_html_declaration(self, i: int) -> int: # As `__startag_text` is private, all references to it must be in this subclass. # The last few lines of `parse_starttag` are reversed so that `handle_starttag` # can override `cdata_mode` in certain situations (in a code span). - __starttag_text = None + __starttag_text: str | None = None def get_starttag_text(self) -> str: """Return full source of start tag: `<...>`.""" diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 7df7939f..296ab834 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -41,8 +41,7 @@ from __future__ import annotations from . import util -from collections import namedtuple -from typing import TYPE_CHECKING, Match, Any +from typing import TYPE_CHECKING, Any, Collection, NamedTuple import re import xml.etree.ElementTree as etree try: # pragma: no cover @@ -54,7 +53,7 @@ from markdown import Markdown -def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry: +def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]: """ Build the default set of inline patterns for Markdown. @@ -181,8 +180,11 @@ def dequote(string: str) -> str: return string -class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])): +class EmStrongItem(NamedTuple): """Emphasis/strong pattern item.""" + pattern: re.Pattern[str] + builder: str + tags: str # The pattern classes @@ -209,7 +211,7 @@ class initialization, the `^(.*)` and `(.*)!` are added automatically and the re """ - ANCESTOR_EXCLUDES = tuple() + ANCESTOR_EXCLUDES: Collection[str] = tuple() """ A collection of elements which are undesirable ancestors. The processor will be skipped if it would cause the content to be a descendant of one of the listed tag names. @@ -236,7 +238,7 @@ def getCompiledRegExp(self) -> re.Pattern: """ Return a compiled regular expression. """ return self.compiled_re - def handleMatch(self, m: Match) -> etree.Element: + def handleMatch(self, m: re.Match[str]) -> etree.Element | str: """Return a ElementTree element from the given match. Subclasses should override this method. @@ -298,7 +300,7 @@ def __init__(self, pattern: str, md: Markdown | None = None): self.safe_mode = False self.md = md - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | str | None, int | None, int | None]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str | None, int | None, int | None]: """Return a ElementTree element from the given match and the start and end index of the matched text. @@ -322,14 +324,14 @@ def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | str | None, class SimpleTextPattern(Pattern): # pragma: no cover """ Return a simple text of `group(2)` of a Pattern. """ - def handleMatch(self, m: Match) -> str: + def handleMatch(self, m: re.Match[str]) -> str: """ Return string content of `group(2)` of a matching pattern. """ return m.group(2) class SimpleTextInlineProcessor(InlineProcessor): """ Return a simple text of `group(1)` of a Pattern. """ - def handleMatch(self, m: Match, data: str) -> tuple[str, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: """ Return string content of `group(1)` of a matching pattern. """ return m.group(1), m.start(0), m.end(0) @@ -337,7 +339,7 @@ def handleMatch(self, m: Match, data: str) -> tuple[str, int, int]: class EscapeInlineProcessor(InlineProcessor): """ Return an escaped character. """ - def handleMatch(self, m: Match, data: str) -> tuple[str | None, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str | None, int, int]: """ If the character matched by `group(1)` of a pattern is in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS] then return the integer representing the character's Unicode code point (as returned by [`ord`][]) wrapped @@ -372,7 +374,7 @@ def __init__(self, pattern: str, tag: str): self.tag = tag """ The tag of the rendered element. """ - def handleMatch(self, m: Match) -> etree.Element: + def handleMatch(self, m: re.Match[str]) -> etree.Element: """ Return [`Element`][xml.etree.ElementTree.Element] of type `tag` with the string in `group(3)` of a matching pattern as the Element's text. @@ -401,7 +403,7 @@ def __init__(self, pattern: str, tag: str): self.tag = tag """ The tag of the rendered element. """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: # pragma: no cover + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # pragma: no cover """ Return [`Element`][xml.etree.ElementTree.Element] of type `tag` with the string in `group(2)` of a matching pattern as the Element's text. @@ -413,14 +415,14 @@ def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: # class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover """ Return an element of type `tag` with no children. """ - def handleMatch(self, m: Match) -> etree.Element: + def handleMatch(self, m: re.Match[str]) -> etree.Element: """ Return empty [`Element`][xml.etree.ElementTree.Element] of type `tag`. """ return etree.Element(self.tag) class SubstituteTagInlineProcessor(SimpleTagInlineProcessor): """ Return an element of type `tag` with no children. """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: """ Return empty [`Element`][xml.etree.ElementTree.Element] of type `tag`. """ return etree.Element(self.tag), m.start(0), m.end(0) @@ -433,7 +435,7 @@ def __init__(self, pattern): self.tag = 'code' """ The tag of the rendered element. """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | str, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: """ If the match contains `group(3)` of a pattern, then return a `code` [`Element`][xml.etree.ElementTree.Element] which contains HTML escaped text (with @@ -456,7 +458,7 @@ class DoubleTagPattern(SimpleTagPattern): # pragma: no cover Useful for strong emphasis etc. """ - def handleMatch(self, m: Match) -> etree.Element: + def handleMatch(self, m: re.Match[str]) -> etree.Element: """ Return [`Element`][xml.etree.ElementTree.Element] in following format: `group(3)group(4)` where `group(4)` is optional. @@ -477,7 +479,7 @@ class DoubleTagInlineProcessor(SimpleTagInlineProcessor): Useful for strong emphasis etc. """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: # pragma: no cover + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # pragma: no cover """ Return [`Element`][xml.etree.ElementTree.Element] in following format: `group(2)group(3)` where `group(3)` is optional. @@ -494,7 +496,7 @@ def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: # class HtmlInlineProcessor(InlineProcessor): """ Store raw inline html and return a placeholder. """ - def handleMatch(self, m: Match, data: str) -> tuple[str, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: """ Store the text of `group(1)` of a pattern and return a placeholder string. """ rawhtml = self.backslash_unescape(self.unescape(m.group(1))) place_holder = self.md.htmlStash.store(rawhtml) @@ -577,7 +579,7 @@ def build_double2(self, m, tags, idx): self.parse_sub_patterns(text, el2, None, idx) return el1 - def parse_sub_patterns(self, data, parent, last, idx): + def parse_sub_patterns(self, data, parent, last, idx) -> None: """ Parses sub patterns. @@ -651,7 +653,7 @@ def build_element(self, m, builder, tags, index): else: return self.build_single(m, tags, index) - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """Parse patterns.""" el = None @@ -686,7 +688,7 @@ class LinkInlineProcessor(InlineProcessor): RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE) RE_TITLE_CLEAN = re.compile(r'\s') - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | None, int | None, int | None]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """ Return an `a` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """ text, index, handled = self.getText(data, m.end(0)) @@ -846,7 +848,7 @@ def getText(self, data, index): class ImageInlineProcessor(LinkInlineProcessor): """ Return a `img` element from the given match. """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | None, int | None, int | None]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """ Return an `img` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """ text, index, handled = self.getText(data, m.end(0)) if not handled: @@ -873,7 +875,7 @@ class ReferenceInlineProcessor(LinkInlineProcessor): RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element | None, int | None, int | None]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """ Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`. @@ -953,7 +955,7 @@ def evalId(self, data, index, text): class AutolinkInlineProcessor(InlineProcessor): """ Return a link Element given an auto-link (``). """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: """ Return an `a` [`Element`][xml.etree.ElementTree.Element] of `group(1)`. """ el = etree.Element("a") el.set('href', self.unescape(m.group(1))) @@ -965,7 +967,7 @@ class AutomailInlineProcessor(InlineProcessor): """ Return a `mailto` link Element given an auto-mail link (``). """ - def handleMatch(self, m: Match, data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: """ Return an [`Element`][xml.etree.ElementTree.Element] containing a `mailto` link of `group(1)`. """ el = etree.Element('a') email = self.unescape(m.group(1)) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 4a18e18e..3da5ee1a 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -37,7 +37,7 @@ from markdown import Markdown -def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry: +def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]: """ Build the default postprocessors for Markdown. """ postprocessors = util.Registry() postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 8787e56b..0f63cdd3 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -34,7 +34,7 @@ from markdown import Markdown -def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry: +def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]: """ Build and return the default set of preprocessors used by Markdown. """ preprocessors = util.Registry() preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30) diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index fceb1881..59a3eb3c 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -28,7 +28,7 @@ import re import xml.etree.ElementTree as etree -from typing import TYPE_CHECKING, Sequence, Any +from typing import TYPE_CHECKING, Any from . import util from . import inlinepatterns @@ -36,7 +36,7 @@ from markdown import Markdown -def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry: +def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]: """ Build the default `treeprocessors` for Markdown. """ treeprocessors = util.Registry() treeprocessors.register(InlineProcessor(md), 'inline', 20) @@ -87,7 +87,7 @@ def __init__(self, md): self.inlinePatterns = md.inlinePatterns self.ancestors = [] - def __makePlaceholder(self, type): + def __makePlaceholder(self, type) -> tuple[str, str]: """ Generate a placeholder """ id = "%04d" % len(self.stashed_nodes) hash = util.INLINE_PLACEHOLDER % id @@ -111,7 +111,7 @@ def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]: else: return None, index + 1 - def __stashNode(self, node, type): + def __stashNode(self, node, type) -> str: """ Add node to stash. """ placeholder, id = self.__makePlaceholder(type) self.stashed_nodes[id] = node @@ -169,7 +169,12 @@ def __processElementText(self, node: etree.Element, subnode: etree.Element, isTe for newChild in childResult: node.insert(pos, newChild[0]) - def __processPlaceholders(self, data: str, parent: etree.Element, isText: bool = True) -> list[etree.ElementTree]: + def __processPlaceholders( + self, + data: str, + parent: etree.Element, + isText: bool = True + ) -> list[tuple[etree.Element, Any]]: """ Process string with placeholders and generate `ElementTree` tree. @@ -245,7 +250,13 @@ def linkText(text): return result - def __applyPattern(self, pattern: str, data: str, patternIndex: int, startIndex: int = 0) -> tuple[str, bool, int]: + def __applyPattern( + self, + pattern: inlinepatterns.Pattern, + data: str, + patternIndex: int, + startIndex: int = 0 + ) -> tuple[str, bool, int]: """ Check if the line fits the pattern, create the necessary elements, add it to `stashed_nodes`. @@ -329,7 +340,7 @@ def __build_ancestors(self, parent, parents): ancestors.reverse() parents.extend(ancestors) - def run(self, tree: etree.Element, ancestors: Sequence[str] | None = None) -> etree.Element: + def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element: """Apply inline patterns to a parsed Markdown tree. Iterate over `Element`, find elements with inline tag, apply inline @@ -347,7 +358,7 @@ def run(self, tree: etree.Element, ancestors: Sequence[str] | None = None) -> et An element tree object with applied inline patterns. """ - self.stashed_nodes = {} + self.stashed_nodes: dict[str, etree.Element] = {} # Ensure a valid parent list, but copy passed in lists # to ensure we don't have the user accidentally change it on us. @@ -448,7 +459,7 @@ class UnescapeTreeprocessor(Treeprocessor): def _unescape(self, m): return chr(int(m.group(1))) - def unescape(self, text): + def unescape(self, text: str) -> str: return self.RE.sub(self._unescape, text) def run(self, root): diff --git a/markdown/util.py b/markdown/util.py index 1ee60297..af686432 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -27,14 +27,15 @@ import re import sys import warnings -from collections import namedtuple from functools import wraps, lru_cache from itertools import count -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, overload if TYPE_CHECKING: # pragma: no cover from markdown import Markdown +_T = TypeVar('_T') + """ Constants you might want to modify @@ -108,7 +109,7 @@ def get_installed_extensions(): return metadata.entry_points(group='markdown.extensions') -def deprecated(message, stacklevel=2): +def deprecated(message: str, stacklevel: int = 2): """ Raise a [`DeprecationWarning`][] when wrapped function/method is called. @@ -133,7 +134,7 @@ def deprecated_func(*args, **kwargs): return wrapper -def parseBoolValue(value: str, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: +def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: """Parses a string representing a boolean value. If parsing was successful, returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, or `None`. If parsing was not successful, raises `ValueError`, or, if @@ -174,7 +175,7 @@ def _get_stack_depth(size=2): return size -def nearing_recursion_limit(): +def nearing_recursion_limit() -> bool: """Return true if current stack depth is within 100 of maximum limit.""" return sys.getrecursionlimit() - _get_stack_depth() < 100 @@ -198,7 +199,7 @@ class Processor: md: The `Markdown` instance this processor is a part of. """ - def __init__(self, md: Markdown = None): + def __init__(self, md: Markdown | None = None): self.md = md @@ -233,15 +234,15 @@ def store(self, html: str) -> str: self.html_counter += 1 return placeholder - def reset(self): + def reset(self) -> None: """ Clear the stash. """ self.html_counter = 0 self.rawHtmlBlocks = [] - def get_placeholder(self, key): + def get_placeholder(self, key: int) -> str: return HTML_PLACEHOLDER % key - def store_tag(self, tag, attrs, left_index, right_index) -> str: + def store_tag(self, tag: str, attrs: list, left_index: int, right_index: int) -> str: """Store tag data and return a placeholder.""" self.tag_data.append({'tag': tag, 'attrs': attrs, 'left_index': left_index, @@ -254,10 +255,12 @@ def store_tag(self, tag, attrs, left_index, right_index) -> str: # Used internally by `Registry` for each item in its sorted list. # Provides an easier to read API when editing the code later. # For example, `item.name` is more clear than `item[0]`. -_PriorityItem = namedtuple('PriorityItem', ['name', 'priority']) +class _PriorityItem(NamedTuple): + name: str + priority: float -class Registry: +class Registry(Generic[_T]): """ A priority sorted registry. @@ -298,25 +301,33 @@ class Registry: """ def __init__(self): - self._data = {} + self._data: dict[str, _T] = {} self._priority = [] self._is_sorted = False - def __contains__(self, item): + def __contains__(self, item: str | _T) -> bool: if isinstance(item, str): # Check if an item exists by this name. return item in self._data.keys() # Check if this instance exists. return item in self._data.values() - def __iter__(self): + def __iter__(self) -> Iterator[_T]: self._sort() return iter([self._data[k] for k, p in self._priority]) - def __getitem__(self, key): + @overload + def __getitem__(self, key: str | int) -> _T: + ... + + @overload + def __getitem__(self, key: slice) -> Registry[_T]: + ... + + def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: self._sort() if isinstance(key, slice): - data = Registry() + data: Registry[_T] = Registry() for k, p in self._priority[key]: data.register(self._data[k], k, p) return data @@ -324,13 +335,13 @@ def __getitem__(self, key): return self._data[self._priority[key].name] return self._data[key] - def __len__(self): + def __len__(self) -> int: return len(self._priority) def __repr__(self): return '<{}({})>'.format(self.__class__.__name__, list(self)) - def get_index_for_name(self, name) -> int: + def get_index_for_name(self, name: str) -> int: """ Return the index of the given name. """ @@ -341,7 +352,7 @@ def get_index_for_name(self, name) -> int: ) raise ValueError('No item named "{}" exists.'.format(name)) - def register(self, item: Any, name: str, priority: int): + def register(self, item: _T, name: str, priority: float) -> None: """ Add an item to the registry with the given name and priority. @@ -363,7 +374,7 @@ def register(self, item: Any, name: str, priority: int): self._data[name] = item self._priority.append(_PriorityItem(name, priority)) - def deregister(self, name, strict=True): + def deregister(self, name: str, strict: bool = True) -> None: """ Remove an item from the registry. From 4be66f25077536a713ddf7e3c042384b7ba5a542 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 26 Oct 2023 09:05:53 +0200 Subject: [PATCH 2/5] Revert non-annotation change --- markdown/extensions/wikilinks.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 763da8e9..9d5acfa3 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -82,14 +82,13 @@ def _getMeta(self): base_url = self.config['base_url'] end_url = self.config['end_url'] html_class = self.config['html_class'] - meta = getattr(self.md, 'Meta', None) - if meta is not None: - if 'wiki_base_url' in meta: - base_url = meta['wiki_base_url'][0] - if 'wiki_end_url' in meta: - end_url = meta['wiki_end_url'][0] - if 'wiki_html_class' in meta: - html_class = meta['wiki_html_class'][0] + if hasattr(self.md, 'Meta'): + if 'wiki_base_url' in self.md.Meta: + base_url = self.md.Meta['wiki_base_url'][0] + if 'wiki_end_url' in self.md.Meta: + end_url = self.md.Meta['wiki_end_url'][0] + if 'wiki_html_class' in self.md.Meta: + html_class = self.md.Meta['wiki_html_class'][0] return base_url, end_url, html_class From 0e996d38e57fe5c8fd24765c4e83454b149f7f47 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 26 Oct 2023 18:27:25 +0200 Subject: [PATCH 3/5] Apply suggestions from code review --- markdown/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdown/util.py b/markdown/util.py index af686432..827befd8 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -317,11 +317,11 @@ def __iter__(self) -> Iterator[_T]: return iter([self._data[k] for k, p in self._priority]) @overload - def __getitem__(self, key: str | int) -> _T: + def __getitem__(self, key: str | int) -> _T: # pragma: no cover ... @overload - def __getitem__(self, key: slice) -> Registry[_T]: + def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover ... def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: From d672505aeb198450f865aba79bbf0f57b628a02e Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 27 Oct 2023 21:01:29 +0200 Subject: [PATCH 4/5] Add changelog entry --- docs/changelog.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 2f9e9250..76ad92df 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,6 +8,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [Contributing Guide](contributing.md) for details. +## [Unreleased] + +### Fixed + +* Improve and expand type annotations in the codebase (#1394) + ## [3.5] -- 2023-10-06 ### Added From 094a5636b583fad7638308f447d4bb9f02b4c0ff Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Mon, 30 Oct 2023 17:34:08 +0100 Subject: [PATCH 5/5] Fix "typo" --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 7e8ec26d..ea36d16e 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed * Fix a performance problem with HTML extraction where large HTML input could trigger quadratic line counting behavior (PR#1392). -* Improve and expand type annotations in the codebase (#1394) +* Improve and expand type annotations in the code base (#1394) ## [3.5] -- 2023-10-06