-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsentimentanalysis.py
138 lines (127 loc) · 5.4 KB
/
sentimentanalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# /**
# * Sentiment Analysis Task
# *
# * This python script takes the name of a directory as a command line argument
# and analyzes each .txt file in the directory for sentiment. A report is
# produced in the same directory containing the overall sentiment of the
# collection of text files, as well as an individual sentiment analysis
# of each file.
# * @author Kara Beason <[email protected]>
# * @copyright (c) 2019 Appalachian State University, Boone, NC
# * @license GNU General Public License version 3
# */
from textblob import TextBlob
import os
import glob
import sys
import codecs
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.colors import HexColor
# Check that a command line argument is present.
if len(sys.argv) != 2:
print("Usage: python sentiments_analysis.py <directory>")
exit(0)
directory = str(sys.argv[1])
# Check that the command line argurment is indeed a valid directory.
if (not os.path.isdir(directory)) or (not os.path.exists(directory)):
print("Directory %s is not valid.", directory)
exit(0)
# Check the current y position and whether the next line will run off the page.
# Return a valid new y value.
def get_next_valid_y(report, y):
width, height = letter
if y - 100 < 0:
# if the next line would run off the page/into the margin,
# create a new page and start at the top of the next.
report.showPage()
y = height - 100
return y
else:
y = y - 15
return y
# Determine whether the polarity score (integer passed in)
# is negative (green), neutral (grey), or positive (green)
def get_polarity_color(polarity):
if polarity < -0.05:
return '#FF0000'
elif polarity > 0.05:
return '#008000'
else:
return '#808080'
# Create the body of the report named report_name.
# Sentiments_list is the list of individual sentiment analyses
# and overall is the overall sentiment of the list.
def print_report(studentSentimentDict, overallSentiment):
# PDF lab canvas creation
reportPDF = canvas.Canvas("output.pdf", pagesize=letter)
# although width is unused in this script it's apparently necessary for report lab.
width, height = letter
# decrease y for the top of page margin.
y = height - 100
# Print the overall sentiment analysis on the first page.
reportPDF.drawString(100, y, "Overall Sentiment: ")
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, "Polarity:")
y = get_next_valid_y(reportPDF, y)
color = get_polarity_color(overallSentiment.polarity)
reportPDF.setFillColor(HexColor(color))
reportPDF.drawString(125, y, str(overallSentiment.polarity))
reportPDF.setFillColor(HexColor('#000000'))
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, "Subjectivity:")
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, str(overallSentiment.subjectivity))
# new page.
reportPDF.showPage()
# Sentiment Analysis by text file/ student
for user, sentiment in studentSentimentDict.iteritems():
y = height - 100
reportPDF.drawString(100, y, "Student Name: ")
reportPDF.drawString(225, y, user)
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, "Polarity:")
y = get_next_valid_y(reportPDF, y)
color = get_polarity_color(sentiment.polarity)
reportPDF.setFillColor(HexColor(color))
reportPDF.drawString(125, y, str(sentiment.polarity))
reportPDF.setFillColor(HexColor('#000000'))
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, "Subjectivity:")
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, str(sentiment.subjectivity))
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, "Assessments:")
for word in sentiment.assessments:
y = get_next_valid_y(reportPDF, y)
reportPDF.drawString(125, y, str(word))
reportPDF.showPage()
reportPDF.save()
# Create the sentiments dictionary.
studentSentimentsDict = dict()
overallText = ""
# Change working directory to the dir that was passed in.
os.chdir(directory)
# Iterate over each text file in the directory
for fileName in glob.iglob('*.txt'):
# Username will be the first part of the filename for the report.
userName = fileName.split('_')[0]
userFullName = fileName.split('_')[1]
fileText = ""
with codecs.open(fileName, "r",encoding='utf-8', errors='ignore') as fileData:
# Read in the text file.
fileText += fileData.read()
# Add contents of text file to overall text.
overallText += fileText
# Convert to textblob object
textBlobOutput = TextBlob(fileText)
# Add sentiment assessment to sentiments dict under name <userfullname (username)>
dictIndex = userFullName + " " + "(" + userName + ")"
# Add the sentiment assessment of the text blob to the student sentiment dictionary under the student's name.
studentSentimentsDict[dictIndex] = textBlobOutput.sentiment_assessments
# Convert entire text into a textblob object
overallTextBlobOutput = TextBlob(overallText)
# Get the sentiment assessment of the whole thing.
overallSentiment = overallTextBlobOutput.sentiment_assessments
# Create and save the report.
print_report(studentSentimentsDict, overallSentiment)