-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrind75_scraper.py
56 lines (47 loc) · 1.64 KB
/
grind75_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
from bs4 import BeautifulSoup
import logging
from termcolor import colored
def scrape_grind75():
url = (
"https://www.techinterviewhandbook.org/grind75?hours="
"15&weeks=20&grouping=topics&mode=all"
)
# {problem_link: problem_name}
# since there are duplicate names with different casing
problem_dict = {}
print(colored("Scraping Grind 75...", "blue"))
try:
response = requests.get(url)
# Raises HTTPError if request returns unsuccessful status code
response.raise_for_status()
except requests.exceptions.HTTPError as e:
logging.error(colored(f"HTTP Error: {e}", "red"))
exit()
except requests.exceptions.RequestException as e:
logging.error(colored(f"Error fetching URL: {e}", "red"))
exit()
html = response.text
try:
soup = BeautifulSoup(html, "html.parser")
for link in soup.find_all("a", href=True):
if "/problems" in link["href"]:
problem_link = link["href"]
problem_name = link.text
problem_dict[problem_link] = problem_name
except Exception as e:
logging.error(colored(f"Error while parsing HTML: {e}", "red"))
exit()
if len(problem_dict) == 0:
logging.error(colored(f"No links were scraped from the \
target url: {url}", "red"))
exit()
print(
colored(
f"Successfully scraped {len(problem_dict)} Grind 75 problems.",
"green"))
return problem_dict
if __name__ == "__main__":
res = scrape_grind75()
for link, name in res.items():
print(f"{name}: {link}")