From bd1303ee988e27d09d57107cf091f651ba8daab6 Mon Sep 17 00:00:00 2001 From: Makyen Date: Sat, 26 Oct 2024 16:19:22 -0700 Subject: [PATCH 1/3] More descriptive titles in Findspam tests autopull --- test/test_findspam.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/test_findspam.py b/test/test_findspam.py index 198c4b345e..17fc5932a8 100644 --- a/test/test_findspam.py +++ b/test/test_findspam.py @@ -102,22 +102,22 @@ ('IDNA misleading link', 'http://www.h\u00E5nd.no', '', '', False, False, False), ('Mostly punctuation', ';[].[.[.&_$)_\\*&_@$.[;*/-!#*&)(_.\'].1\\)!#_', '', '', False, False, True), ('Few unique', 'asdss, dadasssaadadda, daaaadadsss, ssa,,,addadas,ss\nsdadadsssadadas, sss\ndaaasdddsaaa, asd', '', '', False, False, True), - ('ketones', 'ketones', 'ketones', 'chemistry.stackexchange.com', False, False, False), - ('ketones', 'ketones', 'ketones', 'chemistry.stackexchange.com', False, True, False), - ('ketones', 'ketones', 'ketones', 'chemistry.stackexchange.com', True, False, False), - ('ketones', 'ketones', 'ketones', 'chemistry.stackexchange.com', True, True, False), - ('keytones', '

Some body

', 'a username', 'superuser.com', False, False, True), - ('A title', 'keytones', 'a username', 'superuser.com', False, False, True), - ('A title', '

Some body

', 'keytones', 'superuser.com', False, False, True), - ('keytones', '

Some body

', 'a username', 'superuser.com', False, True, False), - ('A title', 'keytones', 'a username', 'superuser.com', False, True, True), - ('A title', '

Some body

', 'keytones', 'superuser.com', False, True, True), - ('keytones', '

Some body

', 'a username', 'superuser.com', True, False, True), - ('A title', 'keytones', 'a username', 'superuser.com', True, False, True), - ('A title', '

Some body

', 'keytones', 'superuser.com', True, False, True), - ('keytones', '

Some body

', 'a username', 'superuser.com', True, True, False), - ('A title', 'keytones', 'a username', 'superuser.com', True, True, True), - ('A title', '

Some body

', 'keytones', 'superuser.com', True, True, True), + ('ketones on Chemistry', 'ketones', 'ketones', 'chemistry.stackexchange.com', False, False, False), + ('ketones on Chemistry as answer', 'ketones', 'ketones', 'chemistry.stackexchange.com', False, True, False), + ('ketones on Chemistry as body_summary', 'ketones', 'ketones', 'chemistry.stackexchange.com', True, False, False), + ('ketones on Chemistry as body_summary and answer', 'ketones', 'ketones', 'chemistry.stackexchange.com', True, True, False), + ('keytones on SuperUser', '

Some body

', 'a username', 'superuser.com', False, False, True), + ('keytones on SuperUser as answer', '

Some body

', 'a username', 'superuser.com', False, True, False), + ('A title with KyT in body', 'keytones', 'a username', 'superuser.com', False, False, True), + ('A title with KyT in username', '

Some body

', 'keytones', 'superuser.com', False, False, True), + ('A title with KyT in body as answer', 'keytones', 'a username', 'superuser.com', False, True, True), + ('A title with KyT in username as answer', '

Some body

', 'keytones', 'superuser.com', False, True, True), + ('keytones on SuperUser as body_summary', '

Some body

', 'a username', 'superuser.com', True, False, True), + ('A title with KyT in body as body_summary on SuperUser', 'keytones', 'a username', 'superuser.com', True, False, True), + ('A title with KyT in username as body_summary on SuperUser', '

Some body

', 'keytones', 'superuser.com', True, False, True), + ('keytones on SuperUser as body_summary and answer', '

Some body

', 'a username', 'superuser.com', True, True, False), + ('A title with KyT in body as body summary and answer', 'keytones', 'a username', 'superuser.com', True, True, True), + ('A title with KyT in username as body summary and answer', '

Some body

', 'keytones', 'superuser.com', True, True, True), ('C01nb4s3 support number', 'obfuscated_word in title', 'spammer', 'stackoverflow.com', False, False, True), ('obfuscated_word in body', 'C01nb4$3 support number', 'spammer', 'stackoverflow.com', False, False, True), ('''airline's responsibilities''', 'test case for "not obfuscated after all" (#7345)', 'good guy', 'stackoverflow.com', False, False, False), From a8fce21e2906029dffc50f8bf8cfbdaf7288ce1d Mon Sep 17 00:00:00 2001 From: Makyen Date: Sat, 26 Oct 2024 19:22:57 -0700 Subject: [PATCH 2/3] Implement !!/scan-time and !!/scan-force-time autopull --- chatcommands.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/chatcommands.py b/chatcommands.py index 91706e88e4..f80f1f42e2 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -2075,7 +2075,7 @@ def invite(msg, room_id, roles): # --- Post Responses --- # # noinspection PyIncorrectDocstring @command(str, whole_msg=True, privileged=False, give_name=True, - aliases=["scan", "scan-force", "report-force", "report-direct"]) + aliases=["scan", "scan-force", "report-force", "report-direct", "scan-time", "scan-force-time"]) def report(msg, args, alias_used="report"): """ Report a post (or posts) @@ -2095,6 +2095,10 @@ def report(msg, args, alias_used="report"): alias_used = alias_used or "report" + is_timed = "-time" in alias_used + alias_used = alias_used.replace("-time", "") + start_time = time.time() + argsraw = args.split(' "', 1) urls = argsraw[0].split(' ') @@ -2122,6 +2126,8 @@ def report(msg, args, alias_used="report"): if output: if 1 < len(urls) > output.count("\n") + 1: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) + if is_timed: + output += "\nScanning took {} seconds.".format(round(time.time() - start_time, 3)) return output From 69a48f7959650c72f72bd62fbfdc65f66a6b833b Mon Sep 17 00:00:00 2001 From: Makyen Date: Sat, 26 Oct 2024 15:24:58 -0700 Subject: [PATCH 3/3] Chunk last <100 of BLs & WL to reduce regex recompile autopull --- findspam.py | 129 +++++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 58 deletions(-) diff --git a/findspam.py b/findspam.py index 3281e2b591..2fbabb6cd4 100644 --- a/findspam.py +++ b/findspam.py @@ -26,7 +26,7 @@ from helpers import log, regex_compile_no_cache, strip_pre_and_code_elements, strip_code_elements, \ get_bookended_keyword_regex_text_from_entries, keyword_bookend_regex_text, KEYWORD_BOOKENDING_START, \ - get_non_bookended_keyword_regex_text_from_entries + get_non_bookended_keyword_regex_text_from_entries, chunk_list import metasmoke_cache from globalvars import GlobalVars import blacklists @@ -582,45 +582,46 @@ class FindSpam: ('warning', '**Very High** ', 30), # > 30 s: Log a "warning" and output to chat as bold "Very High" ] + @staticmethod + def _update_a_blacklist_dual_rule(rule_list, regex_text_generator, entries): + entries = list(entries) + entries_length = len(entries) + if entries_length > 100: + # Get the length to the 100 below the current length + chunk_length = int(str(entries_length)[:-2] + '00') + entries_lists = chunk_list(entries, chunk_length) + else: + # With <= 100 entries, Use an entries_lists with all the entries first, then a regex that can never match + entries_lists = [entries] + if len(entries_lists) == 1: + entries_lists.append = [r'q(?