-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSentiment.py
61 lines (50 loc) · 1.89 KB
/
Sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Import libraries
import pandas as pd
from collections import Counter
import string
import re
import nltk
#nltk.download('stopwords')
#nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from textblob import TextBlob
stop_words = stopwords.words('english')
# Limit to 7000 rows of data
df = (pd.DataFrame(_arg1))
#df = df[:7000]
def clean_tweets(tweets):
# Remove http from tweets
tweets = re.sub('http', '', tweets)
tweets = re.sub('https', '', tweets)
# tokenize and lowercase
tweets = word_tokenize(tweets)
tweets = [w.lower() for w in tweets]
# remove punctuation from tweets
table = str.maketrans('', '', string.punctuation)
remove_punct = [t.translate(table) for t in tweets]
words = [word for word in remove_punct if word.isalpha()]
# remove stopwords from tweets
stop_words = set(stopwords.words('english'))
return [w for w in words if not w in stop_words]
# Apply clean_tweets to Tweets column
df['Clean_Tweets'] = df['text'].apply(clean_tweets)
# create word count function
def counter(tweets):
cnt = Counter()
for words in tweets:
for word in words:
cnt[word] += 1
return cnt
word_cnt = counter(df['Clean_Tweets'])
most_common_words = word_cnt.most_common()
# create word count dataframe
twitter_word_counts = pd.DataFrame(most_common_words, columns = ['Words', 'Counts'])
# sort word count dataframe
twitter_word_counts = twitter_word_counts.sort_values(by='Counts', ascending=False)
twitter_word_counts = twitter_word_counts[:250]
# compute sentiment scores (polarity) and labels
sentiment_scores = [round(TextBlob(tweet).sentiment.polarity, 3) for tweet in twitter_word_counts['Words']]
sentiment_category = ['positive' if score > 0 else 'negative' if score < 0 else 'neutral' for score in sentiment_scores]
twitter_word_counts['Sentiment'] = sentiment_category
return twitter_word_counts.to_dict(orient='list')