-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquotes_collector.py
46 lines (35 loc) · 1.32 KB
/
quotes_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from textwrap import wrap
from tkinter import W
from bs4 import BeautifulSoup
from itertools import zip_longest
import requests,csv,os,re
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
# Initialize two lists to store quotes and authors
quotes=[]
authors=[]
# Loop through 1 to 2 pages of quotes on the website
for number_page in range(1,2):
result = requests.get(f"https://www.azquotes.com/top_quotes.html?p={number_page}")
src=result.content
soup= BeautifulSoup(src,"lxml")
# Get Quotes text
quote = soup.find_all("a",{"class":"title"})
# Get Authors
author= soup.find_all("div",{"class":"author"})
for i in range(len(quote)):
# If the quote length is <= 150 char, it will be ignored to prevent
# problems while designing the image
if(len(quote[i].text)<=150):
# Remove unwanted characters from text
qq=re.sub(r'[’“”]', '', quote[i].text)
aa=re.sub(r'[’“”]', '', author[i].text.strip())
quotes.append(qq)
authors.append(aa)
# Export data to csv file
file_list=[quotes,authors]
exported= zip_longest(*file_list)
myfile=open("quotes.csv","w",newline='')
wr = csv.writer(myfile)
wr.writerows(exported)