From 9526f30c8396293d2c6f60bc10b141a0d6d44d8d Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 22 Feb 2024 23:40:01 +0100 Subject: [PATCH] support for regexes with ublacklist --- scripts/ublacklist.py | 9 +++++++++ scripts/update.sh | 4 ++++ sources/regex_addresses/README.md | 4 ++++ sources/regex_titles/Clickbait.txt | 2 ++ sources/regex_titles/README.md | 1 + 5 files changed, 20 insertions(+) create mode 100644 sources/regex_addresses/README.md create mode 100644 sources/regex_titles/Clickbait.txt create mode 100644 sources/regex_titles/README.md diff --git a/scripts/ublacklist.py b/scripts/ublacklist.py index d3820339..622f2935 100644 --- a/scripts/ublacklist.py +++ b/scripts/ublacklist.py @@ -15,9 +15,18 @@ linesurls = urls.readlines() blocklist = linesdomains + linestlds + linesurls +with open("sources/regex_addresses.txt", "r") as addresses: + linesaddresses = titles.readlines() +with open("sources/regex_titles.txt", "r") as titles: + linestitles = titles.readlines() + # Print blocklist for line in linesheader: print(line.strip()) print() for line in blocklist: print('*://*.' + line.strip() + '/*') +for line in linesaddresses: + print(line.strip()) +for line in linestitles: + print('title/' + line.strip() + '/i') diff --git a/scripts/update.sh b/scripts/update.sh index d55ed9c1..268a3618 100755 --- a/scripts/update.sh +++ b/scripts/update.sh @@ -31,6 +31,10 @@ find ./sources/domains -type f -iname "*fediverse*.txt" -exec cat {} \; > ./sour find ./sources/urls -type f -iname "*.txt" -exec cat {} \; > ./sources/urls.txt ### TLDs find ./sources/tlds -type f -iname "*.txt" -exec cat {} \; > ./sources/tlds.txt +### Regex addresses +find ./sources/regex_addresses -type f -iname "*.txt" -exec cat {} \; > ./sources/regex_addresses.txt +### Regex titles +find ./sources/regex_titles -type f -iname "*.txt" -exec cat {} \; > ./sources/regex_titles.txt ## Cleanup the lists ### Remove comments, inline comments, spaces and empty lines diff --git a/sources/regex_addresses/README.md b/sources/regex_addresses/README.md new file mode 100644 index 00000000..2428f5ae --- /dev/null +++ b/sources/regex_addresses/README.md @@ -0,0 +1,4 @@ +https://help.adblockplus.org/hc/en-us/articles/360062733293-How-to-write-filters#regexps +https://github.com/gorhill/uBlock/wiki/Regular-expression-based-filters +https://iorate.github.io/ublacklist/docs/advanced-features#regular-expressions +https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions \ No newline at end of file diff --git a/sources/regex_titles/Clickbait.txt b/sources/regex_titles/Clickbait.txt new file mode 100644 index 00000000..c159c1f5 --- /dev/null +++ b/sources/regex_titles/Clickbait.txt @@ -0,0 +1,2 @@ +# Spoilers! +/ending explained/i \ No newline at end of file diff --git a/sources/regex_titles/README.md b/sources/regex_titles/README.md new file mode 100644 index 00000000..c94dc5e4 --- /dev/null +++ b/sources/regex_titles/README.md @@ -0,0 +1 @@ +https://iorate.github.io/ublacklist/docs/advanced-features#regular-expressions-for-page-titles \ No newline at end of file