From 2060c896f4e8d2bf8487498724ef4202c40dbdc0 Mon Sep 17 00:00:00 2001 From: meator Date: Sat, 10 Feb 2024 09:46:27 +0100 Subject: [PATCH] Properly escape regex string literals Python3.11 and newer versions like to complain about unknown escape sequences like \., \s etc. --- cppman/formatter/cppreference.py | 4 ++-- cppman/formatter/tableparser.py | 2 +- cppman/main.py | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cppman/formatter/cppreference.py b/cppman/formatter/cppreference.py index 22638c2..d6a4712 100644 --- a/cppman/formatter/cppreference.py +++ b/cppman/formatter/cppreference.py @@ -50,7 +50,7 @@ def member_type_function(g): tail = ' ' + spectag.group(2) cppvertag = re.search( - '^(.*?)(\[(?:(?:since|until) )?C\+\+\d+\]\s*(,\s*)?)+$', head) + r'^(.*?)(\[(?:(?:since|until) )?C\+\+\d+\]\s*(,\s*)?)+$', head) if cppvertag: head = cppvertag.group(1).strip() tail = ' ' + cppvertag.group(2) @@ -117,7 +117,7 @@ def member_type_function(g): # Group t-lines (r'', r'', re.S), (r'(?:.+?.*)+', - lambda x: re.sub('\s*\s*', r', ', x.group(0)), re.S), + lambda x: re.sub(r'\s*\s*', r', ', x.group(0)), re.S), # Member type & function second col is group see basic_fstream for example (r'\s*?((?:(?!).)*?)\s*?' r'((?:(?!).)*?)]*>((?:(?!).)*?)' diff --git a/cppman/formatter/tableparser.py b/cppman/formatter/tableparser.py index 39cf06a..f5894fa 100644 --- a/cppman/formatter/tableparser.py +++ b/cppman/formatter/tableparser.py @@ -144,7 +144,7 @@ def gen(self, fd, index=0, last=False, rowspan=None): ci = 0 for i in range(total): if i in rowspan: - fd.write('\^%s' % ('|' if i < total - 1 else '')) + fd.write(r'\^%s' % ('|' if i < total - 1 else '')) if rowspan[i] == 1: del rowspan[i] else: diff --git a/cppman/main.py b/cppman/main.py index fffc4ff..e8c58ae 100644 --- a/cppman/main.py +++ b/cppman/main.py @@ -129,7 +129,7 @@ def rebuild_index(self): self.db_conn = sqlite3.connect(environ.index_db_re) self.db_cursor = self.db_conn.cursor() try: - self.add_url_filter('\.(jpg|jpeg|gif|png|js|css|swf|svg)$') + self.add_url_filter(r'\.(jpg|jpeg|gif|png|js|css|swf|svg)$') self.set_follow_mode(Crawler.F_SAME_PATH) sources = [('cplusplus.com', 'https://cplusplus.com/reference/', None), @@ -399,10 +399,10 @@ def _extract_keywords(self, text): for tr in x.find_all('tr'): tds = tr.find_all('td') if len(tds) == 2: - if re.match("\s*Type\s*", tds[0].get_text()): + if re.match(r"\s*Type\s*", tds[0].get_text()): typedefTable = True elif typedefTable: - res = re.search('^\s*(\S*)\s+.*$', tds[0].get_text()) + res = re.search(r'^\s*(\S*)\s+.*$', tds[0].get_text()) if res and res.group(1): names.append(res.group(1)) elif not typedefTable: @@ -420,7 +420,7 @@ def _extract_keywords(self, text): if e.name == "table": for tr in e.find_all('tr'): text = re.sub('\n', ' ', tr.get_text()) - res = re.search('^.* (\S+)\s*=.*$', text) + res = re.search(r'^.* (\S+)\s*=.*$', text) if res: names.append(res.group(1)) # search for "Helper types" list @@ -433,7 +433,7 @@ def _extract_keywords(self, text): if e.name == "table": for tr in e.find_all('tr'): text = re.sub('\n', ' ', tr.get_text()) - res = re.search('^.* (\S+)\s*=.*$', text) + res = re.search(r'^.* (\S+)\s*=.*$', text) if res: names.append(res.group(1)) return [html.unescape(n) for n in names] @@ -583,7 +583,7 @@ def find(self, pattern): results = self._search_keyword(pattern) - pat = re.compile('(.*?)(%s)(.*?)( \(.*\))?$' % + pat = re.compile(r'(.*?)(%s)(.*?)( \(.*\))?$' % re.escape(pattern), re.I) if results: