moved bash code to python

headers for every list ! url blocking in ublacklist !
NotaInutilis · Feb 14, 2024 · 2e41cf5 · 2e41cf5
1 parent d62ab22
commit 2e41cf5
Show file tree

Hide file tree

Showing 11 changed files with 186 additions and 34 deletions.
diff --git a/scripts/dnsmasq.py b/scripts/dnsmasq.py
@@ -0,0 +1,21 @@
+# Generates a list in dnsmasq's blocking syntax from the content of the `sources` folder.
+# Usage:
+# python dnsmasq.py > dnsmasq.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/domains.txt", "r") as domains:
+	linesdomains = domains.readlines()
+with open("sources/tlds.txt", "r") as tlds:
+	linestlds = tlds.readlines()
+blocklist = linesdomains + linestlds
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print('address=/' + line.strip() + '/')
diff --git a/scripts/domains.py b/scripts/domains.py
@@ -0,0 +1,19 @@
+# Generates a plain domains list from the content of the `sources` folder.
+# Usage:
+# python domains.py > domains.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/domains.txt", "r") as domains:
+	linesdomains = domains.readlines()
+blocklist = linesdomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print(line.strip())
diff --git a/scripts/fediblockhole.py b/scripts/fediblockhole.py
@@ -0,0 +1,19 @@
+# Generates a .csv format used by FediBlockHole from the Fediverse content of the `sources` folder.
+# Usage:
+# python fediblockhole.py > fediblockhole.csv
+
+# Open header
+with open("sources/headers/fediblockhole.csv", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/fediverse_domains.txt", "r") as domains:
+	linesfedidomains = domains.readlines()
+blocklist = linesfedidomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print(line.strip() + ',suspend,Super-SEO-Spam-Blocker blocklist,Super-SEO-Spam-Blocker blocklist')
diff --git a/scripts/fediverse_domains.py b/scripts/fediverse_domains.py
@@ -0,0 +1,19 @@
+# Generates a plain domains list from the Fediverse content of the `sources` folder.
+# Usage:
+# python fediverse_domains.py > fediverse_domains.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/fediverse_domains.txt", "r") as domains:
+	linesfedidomains = domains.readlines()
+blocklist = linesfedidomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print(line.strip())
diff --git a/scripts/hosts.py b/scripts/hosts.py
@@ -0,0 +1,20 @@
+# Generates a list the hosts file format from the content of the `sources` folder.
+# Usage:
+# python hosts.py > hosts.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/domains.txt", "r") as domains:
+	linesdomains = domains.readlines()
+blocklist = linesdomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print('0.0.0.0 ' + line.strip())
+	print('0.0.0.0 www.' + line.strip())
diff --git a/scripts/hosts_ipv6.py b/scripts/hosts_ipv6.py
@@ -0,0 +1,20 @@
+# Generates a list the hosts file format for IPv6 from the content of the `sources` folder.
+# Usage:
+# python hosts_ipv6.py > hosts_ipv6.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/domains.txt", "r") as domains:
+	linesdomains = domains.readlines()
+blocklist = linesdomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print('::1 ' + line.strip())
+	print('::1 www.' + line.strip())
diff --git a/scripts/mastodon.py b/scripts/mastodon.py
@@ -0,0 +1,19 @@
+# Generates a .csv format used by Mastodn from the Fediverse content of the `sources` folder.
+# Usage:
+# python mastodon.py > mastodon.csv
+
+# Open header
+with open("sources/headers/mastodon.csv", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/fediverse_domains.txt", "r") as domains:
+	linesfedidomains = domains.readlines()
+blocklist = linesfedidomains
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print(line.strip() + ',suspend,false,false,Super-SEO-Spam-Blocker blocklist,false')
diff --git a/scripts/ublacklist.py b/scripts/ublacklist.py
@@ -0,0 +1,23 @@
+# Generates a list in the match pattern format for the the uBlacklist browser extension from the content of the `sources` folder.
+# Usage:
+# python ublacklist.py > ublacklist.txt
+
+# Open header
+with open("sources/headers/default.txt", "r") as header:
+	linesheader = header.readlines()
+
+# Open blocked formats
+with open("sources/domains.txt", "r") as domains:
+	linesdomains = domains.readlines()
+with open("sources/tlds.txt", "r") as tlds:
+	linestlds = tlds.readlines()
+with open("sources/urls.txt", "r") as urls:
+	linesurls = urls.readlines()
+blocklist = linesdomains + linestlds + linesurls
+
+# Print blocklist
+for line in linesheader:
+	print(line.strip())
+print()
+for line in blocklist:
+	print('*://*.' + line.strip() + '/*')
diff --git a/scripts/update.sh b/scripts/update.sh
@@ -9,7 +9,8 @@
 ## Normalize domains: lowercases, remove leading spaces, protocol (`x://`), `www.` subdomains, path ( `/` and after), leave only one space before inline comment (`#`). Keeps comments intact
 # (same code in import.sh)
 find ./sources/domains -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^.*:\/\///i; s/^[.*]*//i; s/^www\.//i; s/\/[^[:space:]]*//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
-## Normalize URLs: lowercases, remove leading spaces, protocol (`*://*`), `www.` subdomains, leave only one space before inline comment (`#`). Keeps comments intact
+## Normalize URLs: lowercases, remove leading spaces, protocol (`*://*`), `www.` subdomains, last `/`, leave only one space before inline comment (`#`). Keeps comments intact
+# add remove last /
 find ./sources/urls -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^.*:\/\///i; s/^[.*]*//i; s/^www\.//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
 ## Normalize TLDs: lowercases, remove leading spaces and `.`, path ( `/` and after), leave only one space before inline comment (`#`). Keeps comments intact
 find ./sources/tlds -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^[.*]*//i;  s/\/[^[:space:]]*//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
@@ -43,35 +44,25 @@ find ./sources -maxdepth 1 -type f -iname "*.txt" -exec bash -c '
 # Generate blocklists
 §
 ## Domains
-cp ./sources/headers/default.txt domains.txt
-cat ./sources/domains.txt >> domains.txt
+python scripts/domains.py > domains.txt
 
 ## For DNS filtering
 ### Hosts
-python scripts/domains_to_hosts.py > hosts.txt
-python scripts/domains_to_hosts_ipv6.py > hosts.txt.ipv6
+python scripts/hosts.py > hosts.txt
+python scripts/hosts_ipv6.py > hosts_ipv6.txt
 ### DNSmasq
-python scripts/domains_to_dnsmasq.py > dnsmasq.txt
+python scripts/dnsmasq.py > dnsmasq.txt
 
 ## For browser extensions
 ### Adblock
 python scripts/adblock.py > adblock.txt
 ### uBlacklist
-python scripts/domains_to_ublacklist.py > ublacklist_temp.txt
-cp ./sources/headers/default.txt ublacklist.txt
-cat ublacklist_temp.txt >> ublacklist.txt
-rm ublacklist_temp.txt
+python scripts/ublacklist.py > ublacklist.txt
 
 ## Generate Fediverse blocklists
 ### Domains
-cp ./sources/fediverse_domains.txt fediverse_domains.txt
+python scripts/fediverse_domains.py > fediverse_domains.txt
 ### Mastodon
-python scripts/fediverse_domains_to_mastodon.py > mastodon_temp.txt
-cp ./sources/headers/mastodon.csv mastodon.csv
-cat mastodon_temp.txt >> mastodon.csv
-rm mastodon_temp.txt
+python scripts/mastodon.py > mastodon.csv
 ### FediBlockHole
-python scripts/fediverse_domains_to_fediblockhole.py > fediblockhole_temp.txt
-cp ./sources/headers/fediblockhole.csv fediblockhole.csv
-cat fediblockhole_temp.txt >> fediblockhole.csv
-rm fediblockhole_temp.txt
+python scripts/fediblockhole.py > fediblockhole.csv
diff --git a/sources/urls/AI/Subreddits.txt b/sources/urls/AI/Subreddits.txt
@@ -1,16 +1,16 @@
 # https://github.com/ite-usagi/ublacklist-noai
-reddit.com/r/aiart/
-reddit.com/r/characterai/
-reddit.com/r/chatgpt/
-reddit.com/r/dalle2/
-reddit.com/r/dalle/
-reddit.com/r/deepdream/
-reddit.com/r/defendingaiart/
-reddit.com/r/generativeai/
-reddit.com/r/generative/
-reddit.com/r/gpt3/
-reddit.com/r/midjourney/
-reddit.com/r/novelai/
-reddit.com/r/openai/
-reddit.com/r/singularity/
-reddit.com/r/stablediffusion/
+reddit.com/r/aiart
+reddit.com/r/characterai
+reddit.com/r/chatgpt
+reddit.com/r/dalle2
+reddit.com/r/dalle
+reddit.com/r/deepdream
+reddit.com/r/defendingaiart
+reddit.com/r/generativeai
+reddit.com/r/generative
+reddit.com/r/gpt3
+reddit.com/r/midjourney
+reddit.com/r/novelai
+reddit.com/r/openai
+reddit.com/r/singularity
+reddit.com/r/stablediffusion
diff --git a/sources/urls/Content farms/Shops/French.txt b/sources/urls/Content farms/Shops/French.txt
@@ -0,0 +1 @@
+https://monoreilleretmoi.com/blogs