-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
188 lines (145 loc) · 5.78 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from threading import Thread
import os
import datetime
def write_list_to_file(file_path, data):
with open(file_path, 'w') as file:
# Join the list elements with a comma and write to the file
file.write(','.join(map(str, data)))
def read_file(file_path):
with open(file_path, 'r') as file:
content = file.read()
return content
def print_all_file_content(folder_path):
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
# Check if it's a file and not a directory
if os.path.isfile(file_path):
print(file_name) # Print the name of the file
print(read_file(file_path))
print("")
def today_folder_path(base_path):
# Get today's date in the format mm-dd-yy
today = datetime.datetime.now().strftime("%m-%d-%y")
# Combine the base path with the new folder name
folder_path = os.path.join(base_path, today)
return folder_path
#
# def today_folder_exists(base_path):
# return os.path.exists(today_folder_path(base_path))
# assuming today folder doesnt exist
def create_date_folder(path):
# Get today's date in the format mm-dd-yy
today = datetime.datetime.now().strftime("%m-%d-%y")
# Combine the base path with the new folder name
folder_path = os.path.join(path, today)
# Create the folder if it doesn't already exist
if not os.path.exists(folder_path):
os.makedirs(folder_path)
print(f"Folder created: {folder_path}")
else:
print(f"Folder already exists: {folder_path}")
return folder_path
menu_url = 'https://netnutrition.cbord.com/nn-prod/vucampusdining'
cafe_xpath_dict = {
"commons": '//*[@id="cbo_nn_unitImages_2"]/div/div/a',
"roth": '//*[@id="cbo_nn_unitImages_20"]/div/div/a',
"ebi": '//*[@id="cbo_nn_unitImages_11"]/div/div/a'
}
lunch_xpath = '//*[@id="cbo_nn_menuDataList"]/div/div[1]/section/div/div/div[2]/a'
dinner_path = '//*[@id="cbo_nn_menuDataList"]/div/div[1]/section/div/div/div[3]/a'
serving_xpath_dict = {
"commons": '//*[@id="itemPanel"]/section/div[4]/table/tbody/tr[40]/td/div',
"roth": "/html/body/div/main/form/div/div[2]/div/div[5]/section/div[4]/table/tbody/tr[24]/td/div",
"ebi": '//*[@id="itemPanel"]/section/div[4]/table/tbody/tr[1]/td/div'
}
back_xpath_serving = "/html/body/div/main/form/div/div[2]/div/div[5]/section/div[1]/nav/a[1]"
back_xpath_cafe = "/html/body/div/main/form/div/div[2]/div/div[3]/section/nav/a"
def threaded_cafe_serving(cafe_name, date_folder):
driver = webdriver.Chrome()
"""Wrapper for get_cafe_serving to run in a thread."""
get_cafe_serving(driver, cafe_name, date_folder)
driver.quit()
# TODO refactor the finding method tehy all are pretty similar
def roth_find_food(driver):
rows = driver.find_elements(By.XPATH, '//tr[@data-categoryid="2749"]')
# Iterate through each row
food = []
for row in rows:
# Find all td elements with class "align-middle" within the row
tds = row.find_elements(By.CLASS_NAME, "align-middle")
# Extract and print the text content of each td element
food_td = tds[1]
food.append(food_td.text)
# for td in tds:
# print(td.text)
return food
def ebi_find_food(driver):
rows = driver.find_elements(By.XPATH, '//tr[@data-categoryid="755"]')
# Iterate through each row
food = []
for row in rows:
# Find all td elements with class "align-middle" within the row
tds = row.find_elements(By.CLASS_NAME, "align-middle")
# Extract and print the text content of each td element
food_td = tds[1]
food.append(food_td.text)
return food
find_food_methods = {
"roth": roth_find_food,
"ebi": ebi_find_food,
}
def wait_n_click(driver, x_path, timeout=10):
link = WebDriverWait(driver, timeout).until(
EC.element_to_be_clickable((By.XPATH, x_path))
)
link.click()
def get_cafe_serving(driver, cafe_name, data_folder_path):
# Navigate to the website
driver.get(menu_url)
cafe_xpath = cafe_xpath_dict[cafe_name]
serving_xpath = serving_xpath_dict[cafe_name]
meal_path = dinner_path
wait_n_click(driver, cafe_xpath)
wait_n_click(driver, meal_path)
wait_n_click(driver, serving_xpath)
food = find_food_methods[cafe_name](driver)
file_path = data_folder_path + "/" + cafe_name + ".txt"
write_list_to_file(file_path, food)
# time.sleep(3)
# wait_n_click(driver, back_xpath_serving)
# wait_n_click(driver, back_xpath_cafe)
def scrape_food(base_path):
date_folder = create_date_folder(base_path)
# print(driver.title)
#
# # get_cafe_serving(driver, "roth")
# get_cafe_serving(driver, "ebi")
# # TODO write to get the Home Grown section of Roth dinner
# driver.quit()
# Create threads for each cafe without using a wrapper function
roth_thread = Thread(target=threaded_cafe_serving, args=["roth", date_folder])
time.sleep(1)
ebi_thread = Thread(target=threaded_cafe_serving, args=["ebi", date_folder])
# Start threads
roth_thread.start()
ebi_thread.start()
# Wait for both threads to complete
roth_thread.join()
ebi_thread.join()
def main():
base_path = "/Users/davidhuang/Desktop/Project/coding-projects/cafe-servings-letter/data"
today_folder = today_folder_path(base_path)
if os.path.exists(today_folder):
# read out existing data
# read_file(today_folder)
print_all_file_content(today_folder)
else:
scrape_food(base_path)
print_all_file_content(today_folder)
if __name__ == '__main__':
main()