From b2643138e34b0f2d74afd99825dce7996e5b6a44 Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Wed, 25 Sep 2024 16:05:01 +0200 Subject: [PATCH 1/6] Add convert_geonode.py and fetch_geonode.sh fetch_geonode.sh downloads lists from geonode.com, stores them in a temp folder, runs convert_geonode.sh that will convert the lists into a file with ip adresses and deletes the temp folder --- scripts/convert_geonode.py | 36 ++++++++++++++++++++++++++++++++++++ scripts/fetch_geonode.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 scripts/convert_geonode.py create mode 100644 scripts/fetch_geonode.sh diff --git a/scripts/convert_geonode.py b/scripts/convert_geonode.py new file mode 100644 index 0000000..71ec364 --- /dev/null +++ b/scripts/convert_geonode.py @@ -0,0 +1,36 @@ +''' +author: Adam.Stefanides@cesnet.cz +descr: Iterates through all files containing proxy servers data from geonode, + extracts ip adresses and writes them into a new file, + this script HAS to be in the same folder as fetch_geonode.sh! +''' + +import json +from pathlib import Path + +folder_path = Path('/data/blacklists/temp') + +# Get all files in the folder +files = [f.name for f in folder_path.iterdir() if f.is_file()] + +output = "" +for file in files: + try: + with open('/data/blacklists/temp/' + file, "r") as f: + obj = json.loads(f.readline()) + for rec in obj["data"]: + # extracting ip + output += rec["ip"] + "\n" + # removing extra '\n' + output = output[:-1] + except Exception as e: + print(f"Exception occured while opening and reading file: {e}") + exit(-1) + +# creating output file +with open(f"/data/blacklists/geonode_proxy_list.txt", "w") as f: + f.write(output) + +#print("convert_geonode.py: Lists merged and converted!") + + \ No newline at end of file diff --git a/scripts/fetch_geonode.sh b/scripts/fetch_geonode.sh new file mode 100644 index 0000000..ca268d7 --- /dev/null +++ b/scripts/fetch_geonode.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +#author: Adam.Stefanides@cesnet.cz +#descr: Donwloads proxy servers lists from geonode, +# convert_geonode.py HAS to be in the same file! + +base_url_part1="https://proxylist.geonode.com/api/proxy-list?limit=500&page=" +base_url_part2="&sort_by=lastChecked&sort_type=desc" + +# Folder to store downloaded files +download_dir=/data/blacklists/temp + +# Maximum number of pages to download +N=14 + +mkdir -p "$download_dir" + +for i in $(seq 0 $N); do + # Construct the URL using the base URL and current number + url="${base_url_part1}${i}${base_url_part2}" + #echo "Downloading $url..." + wget -q -P "$download_dir" "$url" +done + +#echo "All downloads complete!" +#echo "Running convert_geonode.py ..." + +python3 convert_geonode.py + +#echo "Removing temp folder" +rm -r "$download_dir" From 1b34f67a0422eea94bb1edd6be4b37ef26543412 Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Wed, 25 Sep 2024 16:11:10 +0200 Subject: [PATCH 2/6] Add proxy and vpn server ip lists to primary_blacklist.yml --- etc/primary_blacklists.yml | 217 +++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/etc/primary_blacklists.yml b/etc/primary_blacklists.yml index cde419d..a371b03 100644 --- a/etc/primary_blacklists.yml +++ b/etc/primary_blacklists.yml @@ -21,6 +21,223 @@ # headers - dict of key-value pairs specyfing additional HTTP headers # Example of url_params and headers usage can be viewed below in Abuse IP DB blacklist specification. iplists: +#Proxy servers +#HTTP +- id: proxy_ercin_http + name: ErcinDedeoglu's HTTP proxy servers list + descr: HTTP protocol, Number of IP adresses = 26782, Updates every ~10min + provider_link: https://github.com/ErcinDedeoglu/proxies + url: https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_vakhov_http + name: Vakhov's HTTP proxy servers list + descr: HTTP protocol, Number of IP adresses = 11353, Updates every ~15-30min + provider_link: https://github.com/vakhov/fresh-proxy-list + url: https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_speed_http + name: TheSpeedX's HTTP proxy servers list + descr: HTTP protocol, Number of IP adresses = 3321, Updates every 3h + provider_link: https://github.com/TheSpeedX/PROXY-List + url: https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt + regex: "^(\\A)" + time: + hour: "*/3" + minute: 0 + +- id: proxy_officialputuid_http + name: officialputuid's HTTP proxy servers list + descr: HTTP protocol, Number of IP adresses = 2440, Updates every 5h + provider_link: https://github.com/officialputuid/KangProxy + url: https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt + regex: "^(\\A)" + time: + hour: "*/5" + minute: 0 + +- id: proxy_vann_http + name: Vann-Dev's HTTP proxy servers list + descr: HTTP protocol, Number of IP adresses = 881, Updates every 3h + provider_link: https://github.com/Vann-Dev/proxy-list + url: https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/http.txt + regex: "^(\\A)" + time: + hour: "*/3" + minute: 0 + +#HTTPS +- id: proxy_ercin_https + name: ErcinDedeoglu's HTTPS proxy servers list + descr: HTTPS protocol, Number of IP adresses = 26782, Updates every ~10min + provider_link: https://github.com/ErcinDedeoglu/proxies + url: https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_officialputuid_https + name: officialputuid's HTTPS proxy servers list + descr: HTTPS protocol, Number of IP adresses = 3555, Updates every 5h + provider_link: https://github.com/officialputuid/KangProxy + url: https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/https/https.txt + regex: "^(\\A)" + time: + hour: "*/5" + minute: 0 + +- id: proxy_zaeem20_https + name: Zaeem20's HTTPS proxy servers list + descr: HTTPS protocol, Number of IP adresses = 3555, Updates every ~10min + provider_link: https://github.com/Zaeem20/FREE_PROXIES_LIST + url: https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/https.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +#SOCKS4 +- id: proxy_ercin_socks4 + name: ErcinDedeoglu's SOCKS4 proxy servers list + descr: SOCKS4 protocol, Number of IP adresses = 8529, Updates every ~10min + provider_link: https://github.com/ErcinDedeoglu/proxies + url: https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_speed_socks4 + name: TheSpeedX's SOCKS4 proxy servers list + descr: SOCKS4 protocol, Number of IP adresses = 2832, Updates every 3hod + provider_link: https://github.com/TheSpeedX/PROXY-List + url: https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt + regex: "^(\\A)" + time: + hour: "*/3" + minute: 0 + +- id: proxy_officialputuid_socks4 + name: officialputuid's SOCKS4 proxy servers list + descr: SOCKS4 protocol, Number of IP adresses = 6732, Updates every 5h + provider_link: https://github.com/officialputuid/KangProxy + url: https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks4/socks4.txt + regex: "^(\\A)" + time: + hour: "*/5" + minute: 0 + +#SOCKS5 +- id: proxy_ercin_socks5 + name: ErcinDedeoglu's SOCKS5 proxy servers list + descr: SOCKS5 protocol, Number of IP adresses = 2345, Updates every ~10min + provider_link: https://github.com/ErcinDedeoglu/proxies + url: https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_speed_socks5 + name: TheSpeedX's SOCKS5 proxy servers list + descr: SOCKS5 protocol, Number of IP adresses = 2486, Updates every 3hod + provider_link: https://github.com/TheSpeedX/PROXY-List + url: https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt + regex: "^(\\A)" + time: + hour: "*/3" + minute: 0 + +- id: proxy_officialputuid_socks5 + name: officialputuid's SOCKS5 proxy servers list + descr: SOCKS5 protocol, Number of IP adresses = 3039, Updates every 5h + provider_link: https://github.com/officialputuid/KangProxy + url: https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt + regex: "^(\\A)" + time: + hour: "*/5" + minute: 0 + +#UNKNOWN +- id: proxy_geonode + name: Geonode's proxy servers list + descr: Unknown protocols, Number of IP adresses = 7399, Updates every ? + provider_link: https://geonode.com/free-proxy-list + url: file:///data/blacklists/geonode_proxy_list.txt + regex: "" + time: + hour: "*" + minute: 0 + +- id: proxy_proxyscrape + name: Proxyscrape's proxy servers list + descr: Unknown protocols, Number of IP adresses = 1135, Updates every ? + provider_link: https://proxyscrape.com/ + url: https://api.proxyscrape.com/v3/free-proxy-list/get?request=getproxies&proxy_format=protocolipport&format=text + regex: "(\\A)" + time: + hour: "1" + minute: 0 + +- id: proxy_fineproxy + name: Fineproxy's proxy servers list + descr: Unknown protocols, Number of IP adresses = 2837, Updates every 30min, The source URL may change + provider_link: https://fineproxy.org/free-proxy/ + url: https://fineproxy.org/wp-admin/admin-ajax.php?action=proxylister_download&nonce=62a3af4c7b&format=txt&filter={} + regex: "^(\\A)" + time: + hour: "*" + minute: 0 + +- id: proxy_yemixzy + name: Yemixzy's proxy servers list + descr: Unknown protocols, Number of IP adresses = 10902, Updates every 3hod, Contains 46% duplicates (size is without them) + provider_link: https://github.imc.re/yemixzy/proxy-list + url: https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/unchecked.txt + regex: "^(\\A)" + time: + hour: "*/3" + minute: 0 + +#VPN Servers +- id: vpn_X4BNet_datacenter + name: X4BNet's vpn datacenter servers list + descr: Number of IP adresses = 30082, Updates every day + provider_link: https://github.com/X4BNet/lists_vpn + url: https://raw.githubusercontent.com/X4BNet/lists_vpn/main/output/datacenter/ipv4.txt + regex: "^(\\A)" + time: + hour: "1" + minute: 0 + +- id: vpn_X4BNet_vpn + name: X4BNet's vpn servers list + descr: Number of IP adresses = 3756, Updates every day + provider_link: https://github.com/X4BNet/lists_vpn + url: https://raw.githubusercontent.com/X4BNet/lists_vpn/main/output/vpn/ipv4.txt + regex: "^(\\A)" + time: + hour: "1" + minute: 0 + +- id: vpn_az0 + name: az0's vpn servers list + descr: Number of IP adresses = 20657, Updates every 12h, Contains 25% duplicates + provider_link: https://github.com/az0/vpn_ip + url: https://raw.githubusercontent.com/az0/vpn_ip/main/data/output/ip.txt + regex: "^(\\A)" + time: + hour: "*/12" + minute: 0 + - id: feodo name: Feodo tracker descr: Feodo Tracker is a project of abuse.ch. Shares botnet C&C
servers associated with the Feodo malware family(Dridex, Emotet/Heodo)
as well as TrickBot and QakBot (aka QuakBot / Qbot) From 898ed16af418ce5bbf60f2ee32818438cb7948e1 Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Tue, 1 Oct 2024 08:28:49 +0200 Subject: [PATCH 3/6] Replace fetch_geonode.sh & convert_geonode.py with fetch_geonode.py Using requests lib to avoid creating temp files and folders --- scripts/convert_geonode.py | 36 ----------------------------- scripts/fetch_geonode.py | 46 ++++++++++++++++++++++++++++++++++++++ scripts/fetch_geonode.sh | 31 ------------------------- 3 files changed, 46 insertions(+), 67 deletions(-) delete mode 100644 scripts/convert_geonode.py create mode 100644 scripts/fetch_geonode.py delete mode 100644 scripts/fetch_geonode.sh diff --git a/scripts/convert_geonode.py b/scripts/convert_geonode.py deleted file mode 100644 index 71ec364..0000000 --- a/scripts/convert_geonode.py +++ /dev/null @@ -1,36 +0,0 @@ -''' -author: Adam.Stefanides@cesnet.cz -descr: Iterates through all files containing proxy servers data from geonode, - extracts ip adresses and writes them into a new file, - this script HAS to be in the same folder as fetch_geonode.sh! -''' - -import json -from pathlib import Path - -folder_path = Path('/data/blacklists/temp') - -# Get all files in the folder -files = [f.name for f in folder_path.iterdir() if f.is_file()] - -output = "" -for file in files: - try: - with open('/data/blacklists/temp/' + file, "r") as f: - obj = json.loads(f.readline()) - for rec in obj["data"]: - # extracting ip - output += rec["ip"] + "\n" - # removing extra '\n' - output = output[:-1] - except Exception as e: - print(f"Exception occured while opening and reading file: {e}") - exit(-1) - -# creating output file -with open(f"/data/blacklists/geonode_proxy_list.txt", "w") as f: - f.write(output) - -#print("convert_geonode.py: Lists merged and converted!") - - \ No newline at end of file diff --git a/scripts/fetch_geonode.py b/scripts/fetch_geonode.py new file mode 100644 index 0000000..61f1bc3 --- /dev/null +++ b/scripts/fetch_geonode.py @@ -0,0 +1,46 @@ +#Author: Adam.Stefanides@cesnet.cz + +import requests +import json +import math + +def get_page(n: int) -> dict: + ''' + Descr: Sends GET request to an url with a specified page and returns it as json obj + :param n: page number + :return: json obj [dict] + ''' + url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=" + response = requests.get(url + str(n)) + + file_content = b"" + + for chunk in response.iter_content(chunk_size=1024): + if chunk: + file_content += chunk + + file_string = file_content.decode("utf-8") + + return json.loads(file_string) + +def get_file(n: int) -> str: + ''' + Descr: Donwloads N pages from an url, extracts ip adresses and returns them + :param n: number of pages + :return: string of ip adresses + ''' + output = "" + for i in range(1, n + 1): + obj = get_page(i) + for rec in obj["data"]: + # extracting ip + output += rec["ip"] + "\n" + return output[:-1] + +#Calculating number of pages of API to download +obj = get_page(1) +pages = math.ceil(obj["total"] / obj["limit"]) + +#Write the ip aresses into a file +with open("geonode_proxy_list.txt", "w") as file: + file.write(get_file(pages)) \ No newline at end of file diff --git a/scripts/fetch_geonode.sh b/scripts/fetch_geonode.sh deleted file mode 100644 index ca268d7..0000000 --- a/scripts/fetch_geonode.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -#author: Adam.Stefanides@cesnet.cz -#descr: Donwloads proxy servers lists from geonode, -# convert_geonode.py HAS to be in the same file! - -base_url_part1="https://proxylist.geonode.com/api/proxy-list?limit=500&page=" -base_url_part2="&sort_by=lastChecked&sort_type=desc" - -# Folder to store downloaded files -download_dir=/data/blacklists/temp - -# Maximum number of pages to download -N=14 - -mkdir -p "$download_dir" - -for i in $(seq 0 $N); do - # Construct the URL using the base URL and current number - url="${base_url_part1}${i}${base_url_part2}" - #echo "Downloading $url..." - wget -q -P "$download_dir" "$url" -done - -#echo "All downloads complete!" -#echo "Running convert_geonode.py ..." - -python3 convert_geonode.py - -#echo "Removing temp folder" -rm -r "$download_dir" From ac534c9be60aa91758c4e653e88aceb67dc27809 Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Tue, 1 Oct 2024 08:38:53 +0200 Subject: [PATCH 4/6] Add handling of incorrect (large) number of pages --- scripts/fetch_geonode.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/fetch_geonode.py b/scripts/fetch_geonode.py index 61f1bc3..8eb14cd 100644 --- a/scripts/fetch_geonode.py +++ b/scripts/fetch_geonode.py @@ -3,6 +3,9 @@ import requests import json import math +import sys + +MAX_N_OF_REQUESTS = 100 def get_page(n: int) -> dict: ''' @@ -40,6 +43,9 @@ def get_file(n: int) -> str: #Calculating number of pages of API to download obj = get_page(1) pages = math.ceil(obj["total"] / obj["limit"]) +if pages > MAX_N_OF_REQUESTS: + print("ERROR: number of pages exceeds the number of allowed requests!", file=sys.stderr) + sys.exit(1) #Write the ip aresses into a file with open("geonode_proxy_list.txt", "w") as file: From 0d1e90f88774f0cd0e80dec81ebf166ad56f3009 Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Fri, 4 Oct 2024 11:02:37 +0200 Subject: [PATCH 5/6] Simplify parsing of response in get_page() --- scripts/fetch_geonode.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/fetch_geonode.py b/scripts/fetch_geonode.py index 8eb14cd..5b9ab85 100644 --- a/scripts/fetch_geonode.py +++ b/scripts/fetch_geonode.py @@ -1,7 +1,6 @@ #Author: Adam.Stefanides@cesnet.cz import requests -import json import math import sys @@ -16,15 +15,7 @@ def get_page(n: int) -> dict: url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=" response = requests.get(url + str(n)) - file_content = b"" - - for chunk in response.iter_content(chunk_size=1024): - if chunk: - file_content += chunk - - file_string = file_content.decode("utf-8") - - return json.loads(file_string) + return response.json() def get_file(n: int) -> str: ''' From bd48ee1786b5805797432d10bc94d6fc5f611b9a Mon Sep 17 00:00:00 2001 From: Adam Stefanides Date: Fri, 4 Oct 2024 11:04:19 +0200 Subject: [PATCH 6/6] Fix output file location --- scripts/fetch_geonode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_geonode.py b/scripts/fetch_geonode.py index 5b9ab85..8ae91fa 100644 --- a/scripts/fetch_geonode.py +++ b/scripts/fetch_geonode.py @@ -39,5 +39,5 @@ def get_file(n: int) -> str: sys.exit(1) #Write the ip aresses into a file -with open("geonode_proxy_list.txt", "w") as file: +with open("/data/blacklists/geonode_proxy_list.txt", "w") as file: file.write(get_file(pages)) \ No newline at end of file