Skip to content

Commit

Permalink
moved bash code to python
Browse files Browse the repository at this point in the history
headers for every list ! url blocking in ublacklist !
  • Loading branch information
NotaInutilis committed Feb 14, 2024
1 parent d62ab22 commit 2e41cf5
Show file tree
Hide file tree
Showing 11 changed files with 186 additions and 34 deletions.
21 changes: 21 additions & 0 deletions scripts/dnsmasq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generates a list in dnsmasq's blocking syntax from the content of the `sources` folder.
# Usage:
# python dnsmasq.py > dnsmasq.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/domains.txt", "r") as domains:
linesdomains = domains.readlines()
with open("sources/tlds.txt", "r") as tlds:
linestlds = tlds.readlines()
blocklist = linesdomains + linestlds

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print('address=/' + line.strip() + '/')
19 changes: 19 additions & 0 deletions scripts/domains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generates a plain domains list from the content of the `sources` folder.
# Usage:
# python domains.py > domains.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/domains.txt", "r") as domains:
linesdomains = domains.readlines()
blocklist = linesdomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print(line.strip())
19 changes: 19 additions & 0 deletions scripts/fediblockhole.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generates a .csv format used by FediBlockHole from the Fediverse content of the `sources` folder.
# Usage:
# python fediblockhole.py > fediblockhole.csv

# Open header
with open("sources/headers/fediblockhole.csv", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/fediverse_domains.txt", "r") as domains:
linesfedidomains = domains.readlines()
blocklist = linesfedidomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print(line.strip() + ',suspend,Super-SEO-Spam-Blocker blocklist,Super-SEO-Spam-Blocker blocklist')
19 changes: 19 additions & 0 deletions scripts/fediverse_domains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generates a plain domains list from the Fediverse content of the `sources` folder.
# Usage:
# python fediverse_domains.py > fediverse_domains.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/fediverse_domains.txt", "r") as domains:
linesfedidomains = domains.readlines()
blocklist = linesfedidomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print(line.strip())
20 changes: 20 additions & 0 deletions scripts/hosts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generates a list the hosts file format from the content of the `sources` folder.
# Usage:
# python hosts.py > hosts.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/domains.txt", "r") as domains:
linesdomains = domains.readlines()
blocklist = linesdomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print('0.0.0.0 ' + line.strip())
print('0.0.0.0 www.' + line.strip())
20 changes: 20 additions & 0 deletions scripts/hosts_ipv6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generates a list the hosts file format for IPv6 from the content of the `sources` folder.
# Usage:
# python hosts_ipv6.py > hosts_ipv6.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/domains.txt", "r") as domains:
linesdomains = domains.readlines()
blocklist = linesdomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print('::1 ' + line.strip())
print('::1 www.' + line.strip())
19 changes: 19 additions & 0 deletions scripts/mastodon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generates a .csv format used by Mastodn from the Fediverse content of the `sources` folder.
# Usage:
# python mastodon.py > mastodon.csv

# Open header
with open("sources/headers/mastodon.csv", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/fediverse_domains.txt", "r") as domains:
linesfedidomains = domains.readlines()
blocklist = linesfedidomains

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print(line.strip() + ',suspend,false,false,Super-SEO-Spam-Blocker blocklist,false')
23 changes: 23 additions & 0 deletions scripts/ublacklist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generates a list in the match pattern format for the the uBlacklist browser extension from the content of the `sources` folder.
# Usage:
# python ublacklist.py > ublacklist.txt

# Open header
with open("sources/headers/default.txt", "r") as header:
linesheader = header.readlines()

# Open blocked formats
with open("sources/domains.txt", "r") as domains:
linesdomains = domains.readlines()
with open("sources/tlds.txt", "r") as tlds:
linestlds = tlds.readlines()
with open("sources/urls.txt", "r") as urls:
linesurls = urls.readlines()
blocklist = linesdomains + linestlds + linesurls

# Print blocklist
for line in linesheader:
print(line.strip())
print()
for line in blocklist:
print('*://*.' + line.strip() + '/*')
29 changes: 10 additions & 19 deletions scripts/update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
## Normalize domains: lowercases, remove leading spaces, protocol (`x://`), `www.` subdomains, path ( `/` and after), leave only one space before inline comment (`#`). Keeps comments intact
# (same code in import.sh)
find ./sources/domains -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^.*:\/\///i; s/^[.*]*//i; s/^www\.//i; s/\/[^[:space:]]*//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
## Normalize URLs: lowercases, remove leading spaces, protocol (`*://*`), `www.` subdomains, leave only one space before inline comment (`#`). Keeps comments intact
## Normalize URLs: lowercases, remove leading spaces, protocol (`*://*`), `www.` subdomains, last `/`, leave only one space before inline comment (`#`). Keeps comments intact
# add remove last /
find ./sources/urls -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^.*:\/\///i; s/^[.*]*//i; s/^www\.//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
## Normalize TLDs: lowercases, remove leading spaces and `.`, path ( `/` and after), leave only one space before inline comment (`#`). Keeps comments intact
find ./sources/tlds -type f -iname "*.txt" -exec sed -ri 'h; s/[^#]*//1; x; s/#.*//; s/.*/\L&/; s/^[[:space:]]*//i; s/^[.*]*//i; s/\/[^[:space:]]*//i; s/[[:space:]].*$/ /i; G; s/(.*)\n/\1/' {} \;
Expand Down Expand Up @@ -43,35 +44,25 @@ find ./sources -maxdepth 1 -type f -iname "*.txt" -exec bash -c '
# Generate blocklists
§
## Domains
cp ./sources/headers/default.txt domains.txt
cat ./sources/domains.txt >> domains.txt
python scripts/domains.py > domains.txt

## For DNS filtering
### Hosts
python scripts/domains_to_hosts.py > hosts.txt
python scripts/domains_to_hosts_ipv6.py > hosts.txt.ipv6
python scripts/hosts.py > hosts.txt
python scripts/hosts_ipv6.py > hosts_ipv6.txt
### DNSmasq
python scripts/domains_to_dnsmasq.py > dnsmasq.txt
python scripts/dnsmasq.py > dnsmasq.txt

## For browser extensions
### Adblock
python scripts/adblock.py > adblock.txt
### uBlacklist
python scripts/domains_to_ublacklist.py > ublacklist_temp.txt
cp ./sources/headers/default.txt ublacklist.txt
cat ublacklist_temp.txt >> ublacklist.txt
rm ublacklist_temp.txt
python scripts/ublacklist.py > ublacklist.txt

## Generate Fediverse blocklists
### Domains
cp ./sources/fediverse_domains.txt fediverse_domains.txt
python scripts/fediverse_domains.py > fediverse_domains.txt
### Mastodon
python scripts/fediverse_domains_to_mastodon.py > mastodon_temp.txt
cp ./sources/headers/mastodon.csv mastodon.csv
cat mastodon_temp.txt >> mastodon.csv
rm mastodon_temp.txt
python scripts/mastodon.py > mastodon.csv
### FediBlockHole
python scripts/fediverse_domains_to_fediblockhole.py > fediblockhole_temp.txt
cp ./sources/headers/fediblockhole.csv fediblockhole.csv
cat fediblockhole_temp.txt >> fediblockhole.csv
rm fediblockhole_temp.txt
python scripts/fediblockhole.py > fediblockhole.csv
30 changes: 15 additions & 15 deletions sources/urls/AI/Subreddits.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# https://github.com/ite-usagi/ublacklist-noai
reddit.com/r/aiart/
reddit.com/r/characterai/
reddit.com/r/chatgpt/
reddit.com/r/dalle2/
reddit.com/r/dalle/
reddit.com/r/deepdream/
reddit.com/r/defendingaiart/
reddit.com/r/generativeai/
reddit.com/r/generative/
reddit.com/r/gpt3/
reddit.com/r/midjourney/
reddit.com/r/novelai/
reddit.com/r/openai/
reddit.com/r/singularity/
reddit.com/r/stablediffusion/
reddit.com/r/aiart
reddit.com/r/characterai
reddit.com/r/chatgpt
reddit.com/r/dalle2
reddit.com/r/dalle
reddit.com/r/deepdream
reddit.com/r/defendingaiart
reddit.com/r/generativeai
reddit.com/r/generative
reddit.com/r/gpt3
reddit.com/r/midjourney
reddit.com/r/novelai
reddit.com/r/openai
reddit.com/r/singularity
reddit.com/r/stablediffusion
1 change: 1 addition & 0 deletions sources/urls/Content farms/Shops/French.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://monoreilleretmoi.com/blogs

0 comments on commit 2e41cf5

Please sign in to comment.