-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: use "python-vendorize" for 3rd-party libs
Signed-off-by: Jack Cherng <[email protected]>
- Loading branch information
Showing
12 changed files
with
171 additions
and
206 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
*.dist-info/ export-ignore | ||
*.pyi export-ignore | ||
.dependabot export-ignore | ||
.editorconfig export-ignore | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ Desktop.ini | |
Thumbs.db | ||
|
||
# Python | ||
*.dist-info/ | ||
*.py[cod] | ||
.mypy_cache/ | ||
.ruff_cache/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
19 changes: 10 additions & 9 deletions
19
plugin/libs/trie/__init__.py → plugin/_vendor/trie/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import collections | ||
|
||
__all__ = ('Triegex',) | ||
|
||
OR = r'|' | ||
|
||
# regex below matches nothing https://stackoverflow.com/a/940840/2183102. We | ||
# use '~' to ensure it comes last when lexicographically sorted: | ||
# max(string.printable) is '~' | ||
NOTHING = r'~^(?#match nothing)' | ||
GROUP = r'(?:{0})' | ||
WORD_BOUNDARY = r'\b' | ||
|
||
|
||
class TriegexNode: | ||
|
||
def __init__(self, char: str, end: bool, *childrens): | ||
self.char = char if char is not None else '' | ||
self.end = end | ||
self.childrens = {children.char: children for children in childrens} | ||
|
||
def __iter__(self): | ||
return iter(sorted(self.childrens.values(), key=lambda x: x.char)) | ||
|
||
def __len__(self): | ||
return len(self.childrens) | ||
|
||
def __repr__(self): | ||
return f'<TriegexNode: \'{self.char}\' end={self.end}>' | ||
|
||
def __contains__(self, key): | ||
return key in self.childrens | ||
|
||
def __getitem__(self, key): | ||
return self.childrens[key] | ||
|
||
def __delitem__(self, key): | ||
del self.childrens[key] | ||
|
||
def to_regex(self): | ||
stack = [self] | ||
ready = [] | ||
waiting = [] | ||
|
||
while stack: | ||
waiting.append(stack.pop()) | ||
stack.extend(waiting[-1]) | ||
|
||
while waiting: | ||
node = waiting.pop() | ||
result = node.char | ||
|
||
if node.end: | ||
result += WORD_BOUNDARY | ||
|
||
# if there is only one children, we can safely concatenate chars | ||
# withoug nesting | ||
elif len(node) == 1: | ||
result += ready.pop() | ||
|
||
elif len(node) > 1: | ||
result += GROUP.format(OR.join(reversed( | ||
[ready.pop() for _ in node] | ||
))) | ||
|
||
ready.append(result) | ||
return ready[-1] | ||
|
||
|
||
class Triegex(collections.MutableSet): | ||
|
||
_root = None | ||
|
||
def __init__(self, *words): | ||
""" | ||
Trigex constructor. | ||
""" | ||
|
||
# make sure we match nothing when no words are added | ||
self._root = TriegexNode(None, False, TriegexNode(NOTHING, False)) | ||
|
||
for word in words: | ||
self.add(word) | ||
|
||
def add(self, word: str): | ||
current = self._root | ||
for letter in word[:-1]: | ||
current = current.childrens.setdefault(letter, | ||
TriegexNode(letter, False)) | ||
# this will ensure that we correctly match the word boundary | ||
current.childrens[word[-1]] = TriegexNode(word[-1], True) | ||
|
||
def to_regex(self): | ||
r""" | ||
Produce regular expression that will match each word in the | ||
internal trie. | ||
>>> t = Triegex('foo', 'bar', 'baz') | ||
>>> t.to_regex() | ||
'(?:ba(?:r\\b|z\\b)|foo\\b|~^(?#match nothing))' | ||
""" | ||
return self._root.to_regex() | ||
|
||
def _traverse(self): | ||
stack = [self._root] | ||
current = self._root | ||
while stack: | ||
yield current | ||
current = stack.pop() | ||
stack.extend(current.childrens.values()) | ||
|
||
def __iter__(self): | ||
paths = {self._root.char: []} | ||
for node in self._traverse(): | ||
for children in node: | ||
paths[children.char] = [node.char] + paths[node.char] | ||
if children.end: | ||
char = children.char | ||
yield ''.join(reversed([char] + paths[char])) | ||
|
||
def __len__(self): | ||
return sum(1 for _ in self.__iter__()) | ||
|
||
def __contains__(self, word): | ||
current = self._root | ||
for char in word: | ||
if char not in current: | ||
return False | ||
current = current[char] | ||
return True and current.end # word has to end with the last char | ||
|
||
def discard(self, word): | ||
to_delete = [self._root] | ||
current = self._root | ||
for char in word: | ||
if char not in current: | ||
return | ||
current = current[char] | ||
to_delete.append(current) | ||
if not to_delete[-1].end: | ||
return | ||
while len(to_delete) > 1: | ||
node = to_delete.pop() | ||
if len(node) == 0: | ||
del to_delete[-1][node.char] | ||
return |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.