forked from stevejenkins/postwhite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape_yahoo
executable file
·75 lines (57 loc) · 3.18 KB
/
scrape_yahoo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#! /usr/bin/env bash
#########################################################################
# Scrape Yahoo! - Generates a list of Yahoo! Outbound IPs for Postwhite #
# https://github.com/stevejenkins/postwhite #
#########################################################################
# By Steve Cook (https://www.stevecook.net/)
# and Steve Jenkins (https://www.stevejenkins.com/)
version="1.1"
lastupdated="4 May 2022"
# Verizon, AOL and Yahoo servers have allegedly merged.
yahoo_url="https://senders.yahooinc.com/outbound-mail-servers"
# NO NEED TO EDIT PAST THIS LINE
#################################################################
# Abort script on error
set -e
if ! aggregateCIDR=$(which aggregateCIDR.pl) ; then
echo "fatal: unable to locate aggregateCIDR.pl"
echo "https://github.com/nabbi/route-summarization"
exit 1
fi
printf "Scraping Yahoo! outbound IP addresses...\n"
# Check for passed config file
if [ -n "$1" ]; then
config_file="$1"
else
config_file="/etc/postwhite.conf"
fi
# Read config file options
if [ -s $config_file ] ; then
printf "\nReading options from %s...\n" "$config_file"
. "${config_file}"
else
>&2 printf "%s: Can't find %s. Exiting.\n" "$0" "$config_file"
exit 1
fi
# Check for wget or curl
if [ `command -v wget` ]
then
content=$(wget --user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" $yahoo_url -q -O -)
elif [ -n `command -v curl` ]
then
content=$(curl -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" -s -L $yahoo_url)
else
echo "wget or curl required.";
exit 1;
fi
# Scrape and format IPv4 addresses
ipv4_mailers=$(echo "$content" | grep -E -o '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/([0-9]{1,3}?)' | while read line; do echo "ip4:$line"; done)
# Overwrite old Yahoo! mailer list with scraped IPv4 hosts
echo "$ipv4_mailers" | ${aggregateCIDR} --quiet --spf > "$yahoo_static_hosts"
# Scrape and format IPv6 addresses
# This isn't parsing the page correctly. Post filter through aggregateCIDR.pl to clean invalids
ipv6_mailers=$(echo "$content" | grep -E -o '(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' | while read line; do echo "ipv6:$line"; done)
# Append IPv6 hosts to newly-created Yahoo! mailer list
echo "$ipv6_mailers" | ${aggregateCIDR} --quiet --spf >> "$yahoo_static_hosts"
printf '\nVerizon/AOL/Yahoo! outbound mailers updated.\n'
exit