-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_airbnb.py
115 lines (82 loc) · 2.97 KB
/
scrape_airbnb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import time
import random
from selenium import webdriver
config = None
def min_pause():
time.sleep(config.MIN_PAUSE_SEC)
def short_pause():
time.sleep(config.SHORT_PAUSE_SEC)
def long_pause():
time.sleep(config.LONG_PAUSE_SEC)
def random_pause():
pause_ms = random.sample(config.RANDOM_PAUSE_SEC, 1)
time.sleep(pause_ms[0])
def run(local_config):
# Make config global scope so we don't need to pass it around to
# every function
global config
config = local_config
# Start the chrome webdriver
try:
browser = webdriver.Chrome(config.CHROMEDRIVER_PATH)
long_pause()
except webdriver.WebDriverException as e:
print('The following error occurred starting chromedriver:')
print(e)
return
# Go to airbnb website
browser.get('https://www.airbnb.com')
long_pause()
# top level search
main_search(browser)
def main_search(browser):
for search_term in config.SEARCH_TERMS:
do_search(browser, search_term)
def do_search(browser, search_term):
# Click the search box
button = browser.find_element_by_xpath('//*[@id="site-content"]/div[1]/div/div/div/div[2]/div/div/div/div/button')
button.click()
min_pause()
# Enter search term
search_box = browser.find_element_by_css_selector('#GeocompleteController-via-SearchBar')
search_box.send_keys(search_term + '\n')
short_pause()
# Press "Home" button because we're not interested in "Experiences"
try:
button = browser.find_element_by_xpath('//*[@id="site-content"]/div/div/div[2]/div/div/div[1]/div/div/div[2]/div/div/div/div[1]/div/button')
if button:
button.click()
except:
pass
short_pause()
# Iterate over all of the search results
process_search_results(browser)
def process_search_results(browser):
# Save main window handle
main_window_handle = browser.window_handles[0]
# Current page is 1
page = 1
# Loop over all listings for this search result
while (True):
# Get element containing the listings
elem = browser.find_element_by_xpath('//*[@id="site-content"]/div/div[2]/div/div/div/div[2]/div[*]/div/div/div[2]/div/div/div')
# Get the listing elements on this page
listings = elem.find_elements_by_xpath('*')
# Iterate over each listing
for listing in listings:
listing.click()
long_pause()
browser.switch_to.window(browser.window_handles[-1])
# Close the tab
browser.close()
# Switch back to main window
browser.switch_to.window(main_window_handle)
# Find button to go to next page
page += 1
next_page = browser.find_element_by_css_selector('[aria-label="Page ' + str(page) + '"]')
# If next page is available, go to it. Else, exit loop
if next_page:
next_page.click()
time.sleep(5)
else:
break