From a7409b6c3c92cd465b89d21758b00947d8b3f810 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 11:27:21 +0100 Subject: [PATCH 1/5] Less fp's Signed-off-by: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> --- findspam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/findspam.py b/findspam.py index afb404b788..f29683749f 100644 --- a/findspam.py +++ b/findspam.py @@ -606,7 +606,7 @@ def len_img_block(string): # max_score=2 to prevent voting fraud -@create_rule("post is mostly images", title=False, max_rep=201, max_score=2) +@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=["stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, "" @@ -1417,7 +1417,7 @@ def strip_urls_and_tags(s): @create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) + sites=["stats.stackexchange.com","math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s)
.*?", "", s) From 2e61e8127b9cfacd38d67b1e4ce9a5c27af326af Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 11:42:34 +0100 Subject: [PATCH 2/5] Flake --- findspam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/findspam.py b/findspam.py index f29683749f..8c5c14cebf 100644 --- a/findspam.py +++ b/findspam.py @@ -1417,7 +1417,7 @@ def strip_urls_and_tags(s): @create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["stats.stackexchange.com","math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) + sites=["stats.stackexchange.com", "math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s)
.*?", "", s) From dcd68ead34f5475ad67b592e115d3b92823c166c Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 12:11:51 +0100 Subject: [PATCH 3/5] Update findspam.py --- findspam.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/findspam.py b/findspam.py index 8c5c14cebf..effc8f33f6 100644 --- a/findspam.py +++ b/findspam.py @@ -606,7 +606,8 @@ def len_img_block(string): # max_score=2 to prevent voting fraud -@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=["stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) +@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=[ + "stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, "" @@ -1416,8 +1417,9 @@ def strip_urls_and_tags(s): return URL_REGEX.sub("", TAG_REGEX.sub("", s)) -@create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["stats.stackexchange.com", "math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) +@create_rule("mostly punctuation marks in {}", max_rep=52, sites=[ + "stats.stackexchange.com", "math.stackexchange.com", + "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s)
.*?", "", s) From cde0e1dceed25c46a768b39079653294b0da8846 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 12:22:21 +0100 Subject: [PATCH 4/5] Update test_findspam.py --- test/test_findspam.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_findspam.py b/test/test_findspam.py index 7e21796050..057d48c42e 100644 --- a/test/test_findspam.py +++ b/test/test_findspam.py @@ -72,9 +72,9 @@ ('Should not be caught: http://example.com', '', '', 'drupal.stackexchange.com', False, False, False), ('Should not be caught: https://www.example.com', '', '', 'drupal.stackexchange.com', False, False, False), ('Should not be caught: something@example.com', '', '', 'drupal.stackexchange.com', False, False, False), - ('Title here', '', '', 'stackoverflow.com', False, False, True), - ('Title here', '', '', 'stackoverflow.com', False, False, True), - ('Title here', 'page', '', 'stackoverflow.com', False, False, False), + ('Title here', '', '', 'askubuntu.com', False, False, True), + ('Title here', '', '', 'askubuntu.com', False, False, True), + ('Title here', 'page', '', 'askubuntu.com', False, False, False), ('Error: 2147467259', '', '', 'stackoverflow.com', False, False, False), ('Max limit on number of concurrent ajax request', """
Php java script boring yaaarrr Price-Buy.com
""", 'Price Buy', 'stackoverflow.com', True, True, True), ('Proof of onward travel in Japan?', """ From 5e8e411463621e3b888924fb6ad1b1d7d9281d12 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Fri, 7 Aug 2020 21:06:08 +0100 Subject: [PATCH 5/5] Update findspam.py --- findspam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/findspam.py b/findspam.py index effc8f33f6..5fa4db8b07 100644 --- a/findspam.py +++ b/findspam.py @@ -607,7 +607,7 @@ def len_img_block(string): # max_score=2 to prevent voting fraud @create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=[ - "stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) + "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, ""