Skip to content

Commit

Permalink
Move glob_to_regex into the source - resolves #173
Browse files Browse the repository at this point in the history
  • Loading branch information
Yoric committed Feb 9, 2022
1 parent 9c9bd0e commit f411741
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 3 deletions.
5 changes: 2 additions & 3 deletions synapse_antispam/mjolnir/list_rule.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.util import glob_to_regex
from .matching import glob_to_regex

RECOMMENDATION_BAN = "m.ban"
RECOMMENDATION_BAN_TYPES = [RECOMMENDATION_BAN, "org.matrix.mjolnir.ban"]
Expand All @@ -26,7 +26,6 @@
SERVER_RULE_TYPES = [RULE_SERVER, "m.room.rule.server", "org.matrix.mjolnir.rule.server"]
ALL_RULE_TYPES = [*USER_RULE_TYPES, *ROOM_RULE_TYPES, *SERVER_RULE_TYPES]


def recommendation_to_stable(recommendation):
if recommendation in RECOMMENDATION_BAN_TYPES:
return RECOMMENDATION_BAN
Expand Down
73 changes: 73 additions & 0 deletions synapse_antispam/mjolnir/matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Tools in this file were copied from Synapse as these functions will not
# remain publicly accessible in the module, see https://github.com/matrix-org/mjolnir/pull/174

import re
from typing import Pattern

def re_word_boundary(r: str) -> str:
"""
Adds word boundary characters to the start and end of an
expression to require that the match occur as a whole word,
but do so respecting the fact that strings starting or ending
with non-word characters will change word boundaries.
"""
# we can't use \b as it chokes on unicode. however \W seems to be okay
# as shorthand for [^0-9A-Za-z_].
return r"(^|\W)%s(\W|$)" % (r,)

_WILDCARD_RUN = re.compile(r"([\?\*]+)")
def glob_to_regex(glob: str, word_boundary: bool = False) -> Pattern:
"""Converts a glob to a compiled regex object.
Args:
glob: pattern to match
word_boundary: If True, the pattern will be allowed to match at word boundaries
anywhere in the string. Otherwise, the pattern is anchored at the start and
end of the string.
Returns:
compiled regex pattern
"""

# Patterns with wildcards must be simplified to avoid performance cliffs
# - The glob `?**?**?` is equivalent to the glob `???*`
# - The glob `???*` is equivalent to the regex `.{3,}`
chunks = []
for chunk in _WILDCARD_RUN.split(glob):
# No wildcards? re.escape()
if not _WILDCARD_RUN.match(chunk):
chunks.append(re.escape(chunk))
continue

# Wildcards? Simplify.
qmarks = chunk.count("?")
if "*" in chunk:
chunks.append(".{%d,}" % qmarks)
else:
chunks.append(".{%d}" % qmarks)

res = "".join(chunks)

if word_boundary:
res = re_word_boundary(res)
else:
# \A anchors at start of string, \Z at end of string
res = r"\A" + res + r"\Z"

return re.compile(res, re.IGNORECASE)

0 comments on commit f411741

Please sign in to comment.