Skip to content

Commit

Permalink
tools: update tranco toplist to include subdomains (not including com…
Browse files Browse the repository at this point in the history
…mon ones like www)
  • Loading branch information
jarelllama authored Feb 9, 2025
1 parent 4edacc6 commit 92b0c2c
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions scripts/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,25 +115,34 @@ prune_lines() {
# Output:
# toplist.tmp
download_toplist() {
[[ -s toplist.tmp ]] && return

local max_attempts=3 # Retries twice
local attempt=1
local url='https://tranco-list.eu/top-1m-incl-subdomains.csv.zip'

while (( attempt <= max_attempts )); do
curl -sSLZ 'https://tranco-list.eu/top-1m.csv.zip' \
'https://tranco-list.eu/top-1m-incl-subdomains.csv.zip' -o temp
curl -sSLZ "$url" -o temp

unzip -p temp | mawk -F ',' '{ print $2 }' > toplist.tmp

[[ -s toplist.tmp ]] && break

((attempt++))
done || true

if [[ -s toplist.tmp ]]; then
[[ ! -s toplist.tmp ]] && continue

format_file toplist.tmp
else
error 'Error downloading toplist.'
fi

# Strip away subdomains
while read -r subdomain; do
sed -i "s/^${subdomain}\.//" toplist.tmp
done < config/subdomains.txt

sort -u toplist.tmp -o toplist.tmp

return
done || true

error 'Error downloading toplist.'
}

# Function 'download_nrd_feed' downloads and collates NRD feeds consisting
Expand All @@ -142,7 +151,7 @@ download_toplist() {
# nrd.tmp
# Telegram notification if an error occurred while downloading the NRD feeds
download_nrd_feed() {
[[ -f nrd.tmp ]] && return
[[ -s nrd.tmp ]] && return

local url1='https://raw.githubusercontent.com/xRuffKez/NRD/refs/heads/main/lists/30-day/domains-only/nrd-30day_part1.txt'
local url2='https://raw.githubusercontent.com/xRuffKez/NRD/refs/heads/main/lists/30-day/domains-only/nrd-30day_part2.txt'
Expand Down

0 comments on commit 92b0c2c

Please sign in to comment.