-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase_app.py
151 lines (128 loc) · 7.42 KB
/
base_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
Simple Streamlit webserver application for serving developed classification
models.
Author: Explore Data Science Academy.
Note:
---------------------------------------------------------------------
Please follow the instructions provided within the README.md file
located within this directory for guidance on how to use this script
correctly.
---------------------------------------------------------------------
Description: This file is used to launch a minimal streamlit web
application. You are expected to extend the functionality of this script
as part of your predict project.
For further help with the Streamlit framework, see:
https://docs.streamlit.io/en/latest/
"""
# Streamlit dependencies
import streamlit as st
import joblib,os
# Data dependencies
import pandas as pd
# Vectorizer
#news_vectorizer = open("resources/tfidfvect.pkl","rb")
news_vectorizer = open("resources/mytfidf_vectorizer.pkl","rb")
tweet_cv = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
# Load your raw data
raw = pd.read_csv("resources/train_stream2.csv")
# The main function where we will build the actual app
def main():
"""Tweet Classifier App with Streamlit """
# Creates a main title and subheader on your page -
# these are static across all pages
st.title("Dolph Analytics App")
st.subheader("Climate change tweet classification")
# Creating sidebar with selection box -
# you can create multiple pages this way
options = ["Prediction", "Information"]
selection = st.sidebar.selectbox("Choose Option", options)
# Building out the "Information" page
if selection == "Information":
st.info("General Information")
# You can read a markdown file from supporting resources folder
st.markdown("Our team has the task of predicting whether a tweet supports, refutes, or is undecided about man-made climate change. Also, it can detect if a tweet was just a news item.")
st.markdown("Members of the Dolph Analytics team include Justice Oyemike (Business Analyst), Molapo Kgarose (Data Scientist), Olamide Oladipo (Machine Learning Engineer), David Odimegwu (Product Manager), and Rachael Njuguna (Software Engineer). ")
st.subheader("Raw Twitter data and label")
if st.checkbox('Show raw data'): # data is hidden if box is unchecked
st.write(raw[['sentiment', 'message']]) # will write the df to the page
st.subheader("Some EDA of the Data")
st.markdown("The data is a multi-class target variables with 4 labels for sentiments. The labels are 1 for Pro or supports climate change, 2 for New tweet, 0 for undecided about climate change and -1 for denies climate change. ")
st.markdown("There was noticeable class imbalance with the 1 (Pro) label having higher data points. The image below illustrates this.")
st.image("images/sentiment_labels.png", caption="Count of labels in the dataset")
st.markdown("We also found the hashtags very informative. There were distinctive hashtags for each label. We noticed that some hashtags were generalized across all the labels. Examples were climate, climatechange, and globalwarming. So we removed these because they would skew the modeling process.")
st.markdown("Here are some visualizations of the hashtags we found insightful in each label.")
st.image("images/pro_hashtags.png", caption="Top hashtags for the Pro (1) Label")
st.image("images/anti_hashtags.png", caption="Top hashtags used by people against climate change. Anti (-1) group.")
st.image("images/neutral_hashtags.png", caption="Top hashtags used by people undecided about climate change. Neutral (0) group.")
st.image("images/news_hashtags.png", caption="Top hashtags used when tweeting about a news item. News (2) Label.")
# Building out the predication page
if selection == "Prediction":
st.info("Prediction with ML Models. We are using three models: LinearSVC, Bagging Classifier and Logistic Regression")
# Creating a text box for user input
tweet_text = st.text_area("Enter Text","Type Here")
# first button, linear svc
if st.button("Classify With LinearSVC"):
# Transforming user input with vectorizer
vect_text = tweet_cv.transform([tweet_text]).toarray()
# Load your .pkl file with the model of your choice + make predictions
# Try loading in multiple models to give the user a choice
#predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
predictor = joblib.load(open(os.path.join("resources/linersvc.pkl"),"rb"))
prediction = predictor.predict(vect_text)
# When model has successfully run, will print prediction
# You can use a dictionary or similar structure to make this output
# more human interpretable.
if prediction == 1:
result = "1: Tweet believes in man-made Climate Change."
elif prediction == 2:
result = "2: Tweet is a news item."
elif prediction == 0:
result = "0: Tweet neither supports or refutes believe in climate change."
else:
result = "-1: Tweet does not believe in man-made climate change."
st.success("Text Categorized as {}".format(result))
# second button, Bagging Classifier
if st.button("Classify With Bagging Classifier"):
# Transforming user input with vectorizer
vect_text = tweet_cv.transform([tweet_text]).toarray()
# Load your .pkl file with the model of your choice + make predictions
# Try loading in multiple models to give the user a choice
#predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
predictor = joblib.load(open(os.path.join("resources/bagging_classifier.pkl"),"rb"))
prediction = predictor.predict(vect_text)
# When model has successfully run, will print prediction
# You can use a dictionary or similar structure to make this output
# more human interpretable.
if prediction == 1:
result = "1: Tweet believes in man-made Climate Change."
elif prediction == 2:
result = "2: Tweet is a news item."
elif prediction == 0:
result = "0: Tweet neither supports or refutes believe in climate change."
else:
result = "-1: Tweet does not believe in man-made climate change."
st.success("Text Categorized as {}".format(result))
# the third button, logistic regression button
if st.button("Classify With Resampled Logistic Regression"):
# Transforming user input with vectorizer
vect_text = tweet_cv.transform([tweet_text]).toarray()
# Load your .pkl file with the model of your choice + make predictions
# Try loading in multiple models to give the user a choice
#predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
predictor = joblib.load(open(os.path.join("resources/logistic_regression_resampled.pkl"),"rb"))
prediction = predictor.predict(vect_text)
# When model has successfully run, will print prediction
# You can use a dictionary or similar structure to make this output
# more human interpretable.
if prediction == 1:
result = "1: Tweet believes in man-made Climate Change."
elif prediction == 2:
result = "2: Tweet is a news item."
elif prediction == 0:
result = "0: Tweet neither supports or refutes believe in climate change."
else:
result = "-1: Tweet does not believe in man-made climate change."
st.success("Text Categorized as {}".format(result))
# Required to let Streamlit instantiate our web app.
if __name__ == '__main__':
main()