-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.py
73 lines (55 loc) · 2.63 KB
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
from bs4 import BeautifulSoup
import csv
print('Now starting the web scrapper \n')
edmunds = 'https://www.edmunds.com/'
zebra = 'https://www.thezebra.com/auto_insurance/vehicles/'
car_makes = ['ford', 'honda', 'hyundai', 'toyota']
car_models = ['escape', 'cr-v','santa-fe', 'rav4']
year = '2017'
def makeSoup(link):
page = requests.get(link, headers = {'User-Agent' : 'Mozilla/5.0'})
html = page.text
soup = BeautifulSoup(html, 'html.parser')
return soup
def formatLines(make,model,msrp,resale,insurance_list,maintancance_list):
data_list = []
data_list.append(make)
data_list.append(model)
data_list.append(msrp)
data_list.append(resale)
for i in range(len(insurance_list)):
data_list.append(insurance_list[i])
for i in range(len(maintancance_list)):
data_list.append(insurance_list[i])
return data_list
with open('car_data.csv', mode = 'w') as car_data:
car_writer = csv.writer(car_data, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
car_writer.writerow(['Make','Model','MSRP','Resale Value', 'Insurance1','Insurance2','Insurance3','Insurance4','Insurance5','Maintainance1','Maintainance2','Maintainance3','Maintainance4','Maintainance5'])
for car in range(len(car_makes)):
print('Working on '+ car_makes[car] + ' ' + car_models[car])
edmunds_weblink = edmunds + car_makes[car] + '/' + car_models[car] + '/' + year
price_soup = makeSoup(edmunds_weblink)
#pulls price data
price_soup = price_soup.find_all('div', class_ = 'size-30 font-weight-bold text-info text-left')
price_string = price_soup[0].text
price_string_split = price_string.split(' - ')
resale_price = price_string_split[0]
msrp = price_string_split[1]
#collect insurance and maintanence data
i_m_link = edmunds_weblink + '/cost-to-own'
i_m_soup = makeSoup(i_m_link)
i_m_strings = i_m_soup.find_all('li', class_ = 'first')
insurance_tree = i_m_strings[6]
insurance_tags = insurance_tree.find_all_next('li', limit = 5)
insurance = []
for i in range(len(insurance_tags)):
insurance.append(insurance_tags[i].text)
maintianence_tree = i_m_strings[7]
maintainance_tags = insurance_tree.find_all_next('li', limit = 5)
maintainance = []
for i in range(len(maintainance_tags)):
maintainance.append(maintainance_tags[i].text)
print('Printing to csv \n')
car_writer.writerow(formatLines(car_makes[car],car_models[car],msrp,resale_price,insurance,maintainance))
print('Scrape complete')