-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
124 lines (106 loc) · 4.64 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
import os
import nltk
import joblib
import requests
import numpy as np
import urllib.request as urllib
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS
import streamlit as st
def extract_all_reviews(url, clean_reviews, org_reviews, customernames, commentheads, ratings):
with urllib.urlopen(url) as u:
page = u.read()
page_html = BeautifulSoup(page, "html.parser")
reviews = page_html.find_all('div', {'class': 't-ZTKy'})
commentheads_ = page_html.find_all('p', {'class': '_2-N8zT'})
customernames_ = page_html.find_all('p', {'class': '_2sc7ZR _2V5EHH'})
ratings_ = page_html.find_all('div', {'class': ['_3LWZlK _1BLPMq', '_3LWZlK _32lA32 _1BLPMq', '_3LWZlK _1rdVr6 _1BLPMq']})
for review in reviews:
x = review.get_text()
org_reviews.append(re.sub(r'READ MORE', '', x))
clean_reviews.append(re.sub(r'[^a-zA-Z ]', ' ', x).lower())
for cn in customernames_:
customernames.append('~' + cn.get_text())
for ch in commentheads_:
commentheads.append(ch.get_text())
ra = []
for r in ratings_:
try:
if int(r.get_text()) in [1, 2, 3, 4, 5]:
ra.append(int(r.get_text()))
else:
ra.append(0)
except:
ra.append(r.get_text())
ratings += ra
print(ratings)
def main():
st.title(" Sentiment Analysis")
url = st.text_input("Enter the Flipkart product URL:")
num_reviews = st.number_input("Enter the number of reviews to extract:", min_value=1, step=1)
if st.button("Extract Reviews"):
clean_reviews = []
org_reviews = []
customernames = []
commentheads = []
ratings = []
with urllib.urlopen(url) as u:
page = u.read()
page_html = BeautifulSoup(page, "html.parser")
proname_elements = page_html.find_all('span', {'class': 'B_NuCI'})
if proname_elements: # Check if any elements are found
proname = proname_elements[0].get_text()
else:
proname = "Product Name Not Found" # Set a default value if not found
price_elements = page_html.find_all('div', {'class': '_30jeq3 _16Jk6d'})
if price_elements: # Check if any elements are found
price = price_elements[0].get_text()
else:
price = "Price Not Found" # Set a default value if not found
# Getting the link of see all reviews button
all_reviews_url = page_html.find_all('div', {'class': 'col JOpGWq'})
if all_reviews_url: # Check if any elements are found
all_reviews_url = all_reviews_url[0].find_all('a')[-1]
all_reviews_url = 'https://www.flipkart.com' + all_reviews_url.get('href')
url2 = all_reviews_url + '&page=1'
else:
st.error("See all reviews button not found.")
return
# Start reading reviews and go to the next page after all reviews are read
while True:
x = len(clean_reviews)
# Extracting the reviews
extract_all_reviews(url2, clean_reviews, org_reviews, customernames, commentheads, ratings)
url2 = url2[:-1] + str(int(url2[-1]) + 1)
if x == len(clean_reviews) or len(clean_reviews) >= num_reviews:
break
org_reviews = org_reviews[:num_reviews]
clean_reviews = clean_reviews[:num_reviews]
customernames = customernames[:num_reviews]
commentheads = commentheads[:num_reviews]
ratings = ratings[:num_reviews]
# Building wordcloud
for_wc = ' '.join(clean_reviews)
wcstops = set(STOPWORDS)
wc = WordCloud(width=1400, height=800, stopwords=wcstops, background_color='white').generate(for_wc)
plt.figure(figsize=(20, 10), facecolor='k', edgecolor='k')
plt.imshow(wc, interpolation='bicubic')
plt.axis('off')
plt.tight_layout()
plt.savefig('woc.png')
st.image('woc.png')
st.write(f"Product Name: {proname}")
st.write(f"Price: {price}")
st.write("Reviews:")
for i in range(len(org_reviews)):
st.write(f"Review {i+1}:")
st.write(f"Original Review: {org_reviews[i]}")
st.write(f"Cleaned Review: {clean_reviews[i]}")
st.write(f"Customer Name: {customernames[i]}")
st.write(f"Comment Head: {commentheads[i]}")
st.write(f"Rating: {ratings[i]}")
if __name__ == '__main__':
main()