From c72cc479e12c090940a5ea99279c7625e4132a63 Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Mon, 2 Sep 2024 13:50:13 +0300 Subject: [PATCH 01/20] adding allowed attributes to protect against XSS attacks. An example of such an attack: ![asd](-1){onerror="alert(1)"} --- mdit_py_plugins/attrs/index.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 11455f8..4eba102 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -11,12 +11,18 @@ from .parse import ParseError, parse +class NotAllowedAttributesError(ValueError): + pass + + def attrs_plugin( md: MarkdownIt, *, after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"), spans: bool = False, span_after: str = "link", + allowed_attributes: Sequence[str] = [], + strict: bool = False, ) -> None: """Parse inline attributes that immediately follow certain inline elements:: @@ -58,6 +64,23 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: return False try: new_pos, attrs = parse(state.src[state.pos :]) + if allowed_attributes: + if strict: + attrs = { + k: v + for k, v in attrs.items() + if k not in allowed_attributes + } + if attrs: + raise NotAllowedAttributesError( + f"These attributes are not allowed {attrs}" + ) + else: + attrs = { + k: v + for k, v in attrs.items() + if k in allowed_attributes + } except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From 4a1d21e0089f300921df0bec37b945e73ab00845 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 10:52:14 +0000 Subject: [PATCH 02/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 4eba102..98170bd 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -67,20 +67,14 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: if allowed_attributes: if strict: attrs = { - k: v - for k, v in attrs.items() - if k not in allowed_attributes + k: v for k, v in attrs.items() if k not in allowed_attributes } if attrs: raise NotAllowedAttributesError( f"These attributes are not allowed {attrs}" ) else: - attrs = { - k: v - for k, v in attrs.items() - if k in allowed_attributes - } + attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From fc5e4c12f55773a035dc0440cbc5660e37af0c2f Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Tue, 3 Sep 2024 08:58:31 +0300 Subject: [PATCH 03/20] brought it together --- mdit_py_plugins/attrs/index.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 98170bd..a2dadfa 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -21,8 +21,7 @@ def attrs_plugin( after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"), spans: bool = False, span_after: str = "link", - allowed_attributes: Sequence[str] = [], - strict: bool = False, + allowed_attributes: Sequence[str] | None = None, ) -> None: """Parse inline attributes that immediately follow certain inline elements:: @@ -65,16 +64,15 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: try: new_pos, attrs = parse(state.src[state.pos :]) if allowed_attributes: - if strict: - attrs = { - k: v for k, v in attrs.items() if k not in allowed_attributes - } - if attrs: - raise NotAllowedAttributesError( - f"These attributes are not allowed {attrs}" - ) - else: - attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} + attrs = { + k: v + for k, v in attrs.items() + if k not in allowed_attributes + } + if attrs: + raise NotAllowedAttributesError( + f"These attributes are not allowed {attrs}" + ) except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From 49fa7fb28deda201042ab7cd66cc732a3e17b3a2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 06:10:09 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index a2dadfa..240e731 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -64,11 +64,7 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: try: new_pos, attrs = parse(state.src[state.pos :]) if allowed_attributes: - attrs = { - k: v - for k, v in attrs.items() - if k not in allowed_attributes - } + attrs = {k: v for k, v in attrs.items() if k not in allowed_attributes} if attrs: raise NotAllowedAttributesError( f"These attributes are not allowed {attrs}" From 9137c00f812ce362e58029367a98af6bfa386d04 Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Wed, 4 Sep 2024 09:53:19 +0300 Subject: [PATCH 05/20] fix mypy --- mdit_py_plugins/attrs/index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 240e731..eba8248 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import List, Optional, Sequence from markdown_it import MarkdownIt From 3df6f5564eaa5bd925ae1695db02936ba91480a5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 06:56:11 +0000 Subject: [PATCH 06/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index eba8248..47fc9dd 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,4 +1,5 @@ from __future__ import annotations + from typing import List, Optional, Sequence from markdown_it import MarkdownIt From 72e53942ad5c1c13b221808246948d1cb9f2a251 Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Wed, 4 Sep 2024 10:12:24 +0300 Subject: [PATCH 07/20] fix ruff --- mdit_py_plugins/attrs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 47fc9dd..57b542d 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -111,7 +111,7 @@ def attrs_block_plugin(md: MarkdownIt) -> None: md.core.ruler.after("block", "attr", _attr_resolve_block_rule) -def _find_opening(tokens: List[Token], index: int) -> Optional[int]: +def _find_opening(tokens: Sequence[Token], index: int) -> Optional[int] | None: """Find the opening token index, if the token is closing.""" if tokens[index].nesting != -1: return index From c5c18f497f7f881d7fd8d7e73cbe046b174e44d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 07:12:47 +0000 Subject: [PATCH 08/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 57b542d..b10ddad 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional, Sequence +from typing import Optional, Sequence from markdown_it import MarkdownIt from markdown_it.rules_block import StateBlock From 17545544d2b41feb8e46675dc7010a0ee8e9e6c3 Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Wed, 4 Sep 2024 10:19:14 +0300 Subject: [PATCH 09/20] ruff --- mdit_py_plugins/attrs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index b10ddad..9278692 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -111,7 +111,7 @@ def attrs_block_plugin(md: MarkdownIt) -> None: md.core.ruler.after("block", "attr", _attr_resolve_block_rule) -def _find_opening(tokens: Sequence[Token], index: int) -> Optional[int] | None: +def _find_opening(tokens: Sequence[Token], index: int) -> int | None: """Find the opening token index, if the token is closing.""" if tokens[index].nesting != -1: return index From 2c70381a57a22f22fab7a891dd7f9e34f88c09dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 07:19:35 +0000 Subject: [PATCH 10/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 9278692..e88c37c 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Optional, Sequence +from typing import Sequence from markdown_it import MarkdownIt from markdown_it.rules_block import StateBlock From dcff3b26a9b78d4673c62bb854f918234ec7b50f Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Mon, 9 Sep 2024 09:36:04 +0300 Subject: [PATCH 11/20] fix_remark_commit --- mdit_py_plugins/attrs/index.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index e88c37c..192890b 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -13,10 +13,6 @@ from .parse import ParseError, parse -class NotAllowedAttributesError(ValueError): - pass - - def attrs_plugin( md: MarkdownIt, *, @@ -66,11 +62,8 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: try: new_pos, attrs = parse(state.src[state.pos :]) if allowed_attributes: - attrs = {k: v for k, v in attrs.items() if k not in allowed_attributes} - if attrs: - raise NotAllowedAttributesError( - f"These attributes are not allowed {attrs}" - ) + attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} + token.meta["insecure_attrs"] = {k: v for k, v in attrs.items() if k not in allowed_attributes} except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From 84e50193723e44ef78e60b837a20ccc2641eade1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 06:36:23 +0000 Subject: [PATCH 12/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 192890b..094b9ab 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -63,7 +63,9 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: new_pos, attrs = parse(state.src[state.pos :]) if allowed_attributes: attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} - token.meta["insecure_attrs"] = {k: v for k, v in attrs.items() if k not in allowed_attributes} + token.meta["insecure_attrs"] = { + k: v for k, v in attrs.items() if k not in allowed_attributes + } except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From 18e16fb0fa6ddc7d5833062520b6684244dd34e9 Mon Sep 17 00:00:00 2001 From: maximkurbatov Date: Mon, 9 Sep 2024 09:37:32 +0300 Subject: [PATCH 13/20] fix_lint --- mdit_py_plugins/attrs/index.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 094b9ab..55717d5 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -61,10 +61,14 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: return False try: new_pos, attrs = parse(state.src[state.pos :]) - if allowed_attributes: - attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} + if allowed_attributes is not None: + attrs = { + k: v for k, v in attrs.items() if k in allowed_attributes + } token.meta["insecure_attrs"] = { - k: v for k, v in attrs.items() if k not in allowed_attributes + k: v + for k, v in attrs.items() + if k not in allowed_attributes } except ParseError: return False From ca6e88d3903bf2a71c40bbb134c81f7b338b8c0b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 06:41:13 +0000 Subject: [PATCH 14/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 55717d5..80e9141 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -62,13 +62,9 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: try: new_pos, attrs = parse(state.src[state.pos :]) if allowed_attributes is not None: - attrs = { - k: v for k, v in attrs.items() if k in allowed_attributes - } + attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} token.meta["insecure_attrs"] = { - k: v - for k, v in attrs.items() - if k not in allowed_attributes + k: v for k, v in attrs.items() if k not in allowed_attributes } except ParseError: return False From b302bc6df2297c9c79e9f7c420561abfee599a20 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Sep 2024 10:01:09 +0200 Subject: [PATCH 15/20] Update mdit_py_plugins/attrs/index.py --- mdit_py_plugins/attrs/index.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 80e9141..2586c80 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -61,11 +61,11 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: return False try: new_pos, attrs = parse(state.src[state.pos :]) - if allowed_attributes is not None: - attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} - token.meta["insecure_attrs"] = { + if allowed_attributes is not None and (disallowed := { k: v for k, v in attrs.items() if k not in allowed_attributes - } + }): + token.meta["insecure_attrs"] = dissalowed + attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} except ParseError: return False token_index = _find_opening(state.tokens, len(state.tokens) - 1) From f2b89978a7a218ba9a740e91c8f55a3a208a27f4 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Sep 2024 10:01:44 +0200 Subject: [PATCH 16/20] Update mdit_py_plugins/attrs/index.py --- mdit_py_plugins/attrs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 2586c80..381ab29 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -64,7 +64,7 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: if allowed_attributes is not None and (disallowed := { k: v for k, v in attrs.items() if k not in allowed_attributes }): - token.meta["insecure_attrs"] = dissalowed + token.meta["insecure_attrs"] = disallowed attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} except ParseError: return False From a6887dedd01a662785af61f6c0125d96d0aa564c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 08:03:22 +0000 Subject: [PATCH 17/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mdit_py_plugins/attrs/index.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 381ab29..1fb83c5 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -61,9 +61,11 @@ def _attr_inline_rule(state: StateInline, silent: bool) -> bool: return False try: new_pos, attrs = parse(state.src[state.pos :]) - if allowed_attributes is not None and (disallowed := { + if allowed_attributes is not None and ( + disallowed := { k: v for k, v in attrs.items() if k not in allowed_attributes - }): + } + ): token.meta["insecure_attrs"] = disallowed attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} except ParseError: From ba439251a688a7c6e74bc2c62803869ea72d6c71 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Sep 2024 14:39:47 +0200 Subject: [PATCH 18/20] re-work, and add test --- mdit_py_plugins/attrs/index.py | 111 +++++++++++++++------- tests/test_attrs.py | 17 ++++ tests/test_attrs/test_attrs_allowed.yml | 121 ++++++++++++++++++++++++ 3 files changed, 213 insertions(+), 36 deletions(-) create mode 100644 tests/test_attrs/test_attrs_allowed.yml diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 1fb83c5..ce7439d 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import Sequence +from functools import partial +from typing import Any, Sequence from markdown_it import MarkdownIt from markdown_it.rules_block import StateBlock @@ -19,7 +20,7 @@ def attrs_plugin( after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"), spans: bool = False, span_after: str = "link", - allowed_attributes: Sequence[str] | None = None, + allowed: Sequence[str] | None = None, ) -> None: """Parse inline attributes that immediately follow certain inline elements:: @@ -51,43 +52,25 @@ def attrs_plugin( :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`. Note Markdown link references take precedence over this syntax. :param span_after: The name of an inline rule after which spans may be specified. + :param allowed: A list of allowed attribute names. + If not ``None``, any attributes not in this list will be removed + and placed in the token's meta under the key "insecure_attrs". """ - def _attr_inline_rule(state: StateInline, silent: bool) -> bool: - if state.pending or not state.tokens: - return False - token = state.tokens[-1] - if token.type not in after: - return False - try: - new_pos, attrs = parse(state.src[state.pos :]) - if allowed_attributes is not None and ( - disallowed := { - k: v for k, v in attrs.items() if k not in allowed_attributes - } - ): - token.meta["insecure_attrs"] = disallowed - attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} - except ParseError: - return False - token_index = _find_opening(state.tokens, len(state.tokens) - 1) - if token_index is None: - return False - state.pos += new_pos + 1 - if not silent: - attr_token = state.tokens[token_index] - if "class" in attrs and "class" in token.attrs: - attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}" - attr_token.attrs.update(attrs) - return True - if spans: md.inline.ruler.after(span_after, "span", _span_rule) if after: - md.inline.ruler.push("attr", _attr_inline_rule) + md.inline.ruler.push( + "attr", + partial( + _attr_inline_rule, + after=after, + allowed_attributes=None if allowed is None else set(allowed), + ), + ) -def attrs_block_plugin(md: MarkdownIt) -> None: +def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None: """Parse block attributes. Block attributes are attributes on a single line, with no other content. @@ -103,9 +86,20 @@ def attrs_block_plugin(md: MarkdownIt) -> None: A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``. This syntax is inspired by Djot block attributes. + + :param allowed: A list of allowed attribute names. + If not ``None``, any attributes not in this list will be removed + and placed in the token's meta under the key "insecure_attrs". """ md.block.ruler.before("fence", "attr", _attr_block_rule) - md.core.ruler.after("block", "attr", _attr_resolve_block_rule) + md.core.ruler.after( + "block", + "attr", + partial( + _attr_resolve_block_rule, + allowed_attributes=None if allowed is None else set(allowed), + ), + ) def _find_opening(tokens: Sequence[Token], index: int) -> int | None: @@ -159,6 +153,34 @@ def _span_rule(state: StateInline, silent: bool) -> bool: return True +def _attr_inline_rule( + state: StateInline, + silent: bool, + after: Sequence[str], + *, + allowed_attributes: set[str] | None = None, +) -> bool: + if state.pending or not state.tokens: + return False + token = state.tokens[-1] + if token.type not in after: + return False + try: + new_pos, attrs = parse(state.src[state.pos :]) + except ParseError: + return False + token_index = _find_opening(state.tokens, len(state.tokens) - 1) + if token_index is None: + return False + state.pos += new_pos + 1 + if not silent: + attr_token = state.tokens[token_index] + if "class" in attrs and "class" in token.attrs: + attrs["class"] = f"{token.attrs['class']} {attrs['class']}" + _add_attrs(attr_token, attrs, allowed_attributes) + return True + + def _attr_block_rule( state: StateBlock, startLine: int, endLine: int, silent: bool ) -> bool: @@ -207,7 +229,9 @@ def _attr_block_rule( return True -def _attr_resolve_block_rule(state: StateCore) -> None: +def _attr_resolve_block_rule( + state: StateCore, *, allowed_attributes: set[str] | None +) -> None: """Find attribute block then move its attributes to the next block.""" i = 0 len_tokens = len(state.tokens) @@ -231,8 +255,23 @@ def _attr_resolve_block_rule(state: StateCore) -> None: if key == "class" or key not in next_token.attrs: next_token.attrs[key] = value else: - # attribute block takes precedence over attributes in other blocks - next_token.attrs.update(state.tokens[i].attrs) + _add_attrs(next_token, state.tokens[i].attrs, allowed_attributes) state.tokens.pop(i) len_tokens -= 1 + + +def _add_attrs( + token: Token, + attrs: dict[str, Any], + allowed_attributes: set[str] | None, +) -> None: + """Add attributes to a token, skipping any disallowed attributes.""" + if allowed_attributes is not None and ( + disallowed := {k: v for k, v in attrs.items() if k not in allowed_attributes} + ): + token.meta["insecure_attrs"] = disallowed + attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} + + # attributes takes precedence over existing attributes + token.attrs.update(attrs) diff --git a/tests/test_attrs.py b/tests/test_attrs.py index 2e0bd58..cec33d1 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -20,3 +20,20 @@ def test_attrs(line, title, input, expected): text = md.render(input) print(text) assert text.rstrip() == expected.rstrip() + + +def test_attrs_allowed(data_regression): + allowed = ["safe"] + md = ( + MarkdownIt("commonmark") + .use(attrs_plugin, allowed=allowed) + .use(attrs_block_plugin, allowed=allowed) + ) + tokens = md.parse(""" +{danger1=a safe=b} +{danger2=c safe=d} +# header + +`inline`{safe=a danger=b} + """) + data_regression.check([t.as_dict() for t in tokens]) diff --git a/tests/test_attrs/test_attrs_allowed.yml b/tests/test_attrs/test_attrs_allowed.yml new file mode 100644 index 0000000..64e7cd8 --- /dev/null +++ b/tests/test_attrs/test_attrs_allowed.yml @@ -0,0 +1,121 @@ +- attrs: + - - safe + - d + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: + - 3 + - 4 + markup: '#' + meta: + insecure_attrs: + danger1: a + danger2: c + nesting: 1 + tag: h1 + type: heading_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: header + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: header + hidden: false + info: '' + level: 1 + map: + - 3 + - 4 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: null + markup: '#' + meta: {} + nesting: -1 + tag: h1 + type: heading_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: + - 5 + - 6 + markup: '' + meta: {} + nesting: 1 + tag: p + type: paragraph_open +- attrs: null + block: true + children: + - attrs: + - - safe + - a + block: false + children: null + content: inline + hidden: false + info: '' + level: 0 + map: null + markup: '`' + meta: + insecure_attrs: + danger: b + nesting: 0 + tag: code + type: code_inline + content: '`inline`{safe=a danger=b}' + hidden: false + info: '' + level: 1 + map: + - 5 + - 6 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: -1 + tag: p + type: paragraph_close From 9cb4c29e515dae9dd1cda9c7fd1fc728b8d8444a Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Sep 2024 14:46:29 +0200 Subject: [PATCH 19/20] Update index.py --- mdit_py_plugins/attrs/index.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index ce7439d..0890728 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -158,7 +158,7 @@ def _attr_inline_rule( silent: bool, after: Sequence[str], *, - allowed_attributes: set[str] | None = None, + allowed: set[str] | None = None, ) -> bool: if state.pending or not state.tokens: return False @@ -177,7 +177,7 @@ def _attr_inline_rule( attr_token = state.tokens[token_index] if "class" in attrs and "class" in token.attrs: attrs["class"] = f"{token.attrs['class']} {attrs['class']}" - _add_attrs(attr_token, attrs, allowed_attributes) + _add_attrs(attr_token, attrs, allowed) return True @@ -229,9 +229,7 @@ def _attr_block_rule( return True -def _attr_resolve_block_rule( - state: StateCore, *, allowed_attributes: set[str] | None -) -> None: +def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None: """Find attribute block then move its attributes to the next block.""" i = 0 len_tokens = len(state.tokens) @@ -255,7 +253,7 @@ def _attr_resolve_block_rule( if key == "class" or key not in next_token.attrs: next_token.attrs[key] = value else: - _add_attrs(next_token, state.tokens[i].attrs, allowed_attributes) + _add_attrs(next_token, state.tokens[i].attrs, allowed) state.tokens.pop(i) len_tokens -= 1 From 53d865d50550e747b18644c9cbbefc19ef5854e2 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Sep 2024 14:51:55 +0200 Subject: [PATCH 20/20] Update index.py --- mdit_py_plugins/attrs/index.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 0890728..3efaab6 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -65,7 +65,7 @@ def attrs_plugin( partial( _attr_inline_rule, after=after, - allowed_attributes=None if allowed is None else set(allowed), + allowed=None if allowed is None else set(allowed), ), ) @@ -96,8 +96,7 @@ def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) "block", "attr", partial( - _attr_resolve_block_rule, - allowed_attributes=None if allowed is None else set(allowed), + _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed) ), ) @@ -262,14 +261,14 @@ def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> N def _add_attrs( token: Token, attrs: dict[str, Any], - allowed_attributes: set[str] | None, + allowed: set[str] | None, ) -> None: """Add attributes to a token, skipping any disallowed attributes.""" - if allowed_attributes is not None and ( - disallowed := {k: v for k, v in attrs.items() if k not in allowed_attributes} + if allowed is not None and ( + disallowed := {k: v for k, v in attrs.items() if k not in allowed} ): token.meta["insecure_attrs"] = disallowed - attrs = {k: v for k, v in attrs.items() if k in allowed_attributes} + attrs = {k: v for k, v in attrs.items() if k in allowed} # attributes takes precedence over existing attributes token.attrs.update(attrs)