-
Notifications
You must be signed in to change notification settings - Fork 0
/
CarsWebScraping.py
79 lines (64 loc) · 2.81 KB
/
CarsWebScraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from bs4 import BeautifulSoup
import requests
import csv
url = 'https://www.cars.com/shopping/results/?stock_type=all&makes%5B%5D=bmw&models%5B%5D=&list_price_max=&maximum_distance=20&zip='
req = requests.get(url).text
soup = BeautifulSoup(req, 'lxml')
# print(soup.prettify())
vehicle_cards = soup.find_all('div', {'class':'vehicle-card-main js-gallery-click-card'})
# print(len(vehicle_cards))
# for each in vehicle_cards:
# print(vehicle_cards[0].prettify())
# print()
# print('-----'*40)
# vehicle = vehicle_cards[0]
# print(vehicle.prettify())
csv_file = open('Cars Web Scraping22.csv', 'w')
csv_writer = csv.writer(csv_file, dialect='excel')
csv_writer.writerow(['NAME', 'RATINGS', 'REVIEWS', 'DEALER', 'PRICE'])
count = 0 # for counting the total no of vehicles
for i in range(2): # use for loop for grabing the details of cars from a specific range of 'next pages'. else use while loop for all pages. and then add try except blocks and add break fun in except to terminate the program after visiting all the next pages.
for vehicle in vehicle_cards:
count += 1
print(count)
try:
car_name = vehicle.find('a', {'class':'vehicle-card-link js-gallery-click-link'}).h2.text
except :
car_name = 'None'
print('Car Name: ', car_name)
# print()
try:
rating = vehicle.find('div', {'class':'sds-rating'}).span.text
except :
rating = 'None'
print('Ratings: ', rating)
# print()
try:
reviews = (vehicle.find('span', {'class':'sds-rating__link sds-button-link'}).get_text())[1:-9] # [1:-9] used to remove parathises and reviews from string
except:
reviews = 'None'
print('Reviews: ', reviews)
# print()
try:
car_dealer = (vehicle.find('div', class_='dealer-name').get_text()).strip()
except :
car_dealer = 'None'
print('Dealer: ', car_dealer)
# print()
try:
car_price = vehicle.find('span', class_='primary-price').get_text()
except :
car_price = 'None'
print('Price: ', car_price)
# print()
print('-'*30)
csv_writer.writerow([car_name, rating, reviews, car_dealer, car_price])
next = soup.find('div', class_='sds-pagination__controls')
# next_page = (next.find_all('a')[-1])['href']
next_page = next.find('a', {'id':'next_paginate'})['href']
next_page_url = 'https://www.cars.com' + next_page
# print(next_page_url)
req = requests.get(next_page_url).text
soup = BeautifulSoup(req, 'lxml')
vehicle_cards = soup.find_all('div', {'class':'vehicle-card-main js-gallery-click-card'})
csv_file.close()