From a7409b6c3c92cd465b89d21758b00947d8b3f810 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 11:27:21 +0100 Subject: [PATCH 1/5] Less fp's Signed-off-by: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> --- findspam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/findspam.py b/findspam.py index afb404b788..f29683749f 100644 --- a/findspam.py +++ b/findspam.py @@ -606,7 +606,7 @@ def len_img_block(string): # max_score=2 to prevent voting fraud -@create_rule("post is mostly images", title=False, max_rep=201, max_score=2) +@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=["stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, "" @@ -1417,7 +1417,7 @@ def strip_urls_and_tags(s): @create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) + sites=["stats.stackexchange.com","math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s).*?", "", s) From 2e61e8127b9cfacd38d67b1e4ce9a5c27af326af Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 11:42:34 +0100 Subject: [PATCH 2/5] Flake --- findspam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/findspam.py b/findspam.py index f29683749f..8c5c14cebf 100644 --- a/findspam.py +++ b/findspam.py @@ -1417,7 +1417,7 @@ def strip_urls_and_tags(s): @create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["stats.stackexchange.com","math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) + sites=["stats.stackexchange.com", "math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s).*?", "", s) From dcd68ead34f5475ad67b592e115d3b92823c166c Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 12:11:51 +0100 Subject: [PATCH 3/5] Update findspam.py --- findspam.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/findspam.py b/findspam.py index 8c5c14cebf..effc8f33f6 100644 --- a/findspam.py +++ b/findspam.py @@ -606,7 +606,8 @@ def len_img_block(string): # max_score=2 to prevent voting fraud -@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=["stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) +@create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=[ + "stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, "" @@ -1416,8 +1417,9 @@ def strip_urls_and_tags(s): return URL_REGEX.sub("", TAG_REGEX.sub("", s)) -@create_rule("mostly punctuation marks in {}", max_rep=52, - sites=["stats.stackexchange.com", "math.stackexchange.com", "mathoverflow.net", "codegolf.stackexchange.com"]) +@create_rule("mostly punctuation marks in {}", max_rep=52, sites=[ + "stats.stackexchange.com", "math.stackexchange.com", + "mathoverflow.net", "codegolf.stackexchange.com"]) def mostly_punctuations(s, site): # Strip code blocks here rather than with `stripcodeblocks` so we get the length of the whole post in s. body = regex.sub(r"(?s).*?", "", s) From cde0e1dceed25c46a768b39079653294b0da8846 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Sat, 1 Aug 2020 12:22:21 +0100 Subject: [PATCH 4/5] Update test_findspam.py --- test/test_findspam.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_findspam.py b/test/test_findspam.py index 7e21796050..057d48c42e 100644 --- a/test/test_findspam.py +++ b/test/test_findspam.py @@ -72,9 +72,9 @@ ('Should not be caught: http://example.com', '', '', 'drupal.stackexchange.com', False, False, False), ('Should not be caught: https://www.example.com', '', '', 'drupal.stackexchange.com', False, False, False), ('Should not be caught: something@example.com', '', '', 'drupal.stackexchange.com', False, False, False), - ('Title here', 'my image', '', 'stackoverflow.com', False, False, True), - ('Title here', 'my image', '', 'stackoverflow.com', False, False, True), - ('Title here', 'page', '', 'stackoverflow.com', False, False, False), + ('Title here', 'my image', '', 'askubuntu.com', False, False, True), + ('Title here', 'my image', '', 'askubuntu.com', False, False, True), + ('Title here', 'page', '', 'askubuntu.com', False, False, False), ('Error: 2147467259', '', '', 'stackoverflow.com', False, False, False), ('Max limit on number of concurrent ajax request', """

Php java script boring yaaarrr Price-Buy.com

""", 'Price Buy', 'stackoverflow.com', True, True, True), ('Proof of onward travel in Japan?', """

The best solution to overcome the problem of your travelenter image description here

From 5e8e411463621e3b888924fb6ad1b1d7d9281d12 Mon Sep 17 00:00:00 2001 From: Daniil-M-beep <64322880+Daniil-M-beep@users.noreply.github.com> Date: Fri, 7 Aug 2020 21:06:08 +0100 Subject: [PATCH 5/5] Update findspam.py --- findspam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/findspam.py b/findspam.py index effc8f33f6..5fa4db8b07 100644 --- a/findspam.py +++ b/findspam.py @@ -607,7 +607,7 @@ def len_img_block(string): # max_score=2 to prevent voting fraud @create_rule("post is mostly images", title=False, max_rep=201, max_score=2, sites=[ - "stackoverflow.com", "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) + "math.stackexchange.com", "mathoverflow.net", "stats.stackexchange.com"]) def mostly_img(s, site): if len(s) == 0: return False, ""