-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstats.py
175 lines (113 loc) · 3.96 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#
# stats.py
#
# This file contains code to help compute
# various basic stats for a given piece of text
#
# imports #
from data import *
# constants #
VOWELS = "aeiou"
# functions #
def __remove_extra(s):
'''
Removes trailing non-alphabetic characters (,!?;:) from s
'''
i = len(s)
while i > 0 and not s[i - 1].isalpha():
i = i - 1
return s[:i]
def __build_counts(s, p=lambda x : True):
'''
Given a string s and a predicate p
builds up a dict mapping each char
in s to its count (if p(char) evaluates to True)
NOTE - p must be a function which takes a char
as input and produces a bool as output
'''
counts = {}
for ch in s:
ch = ch.lower()
if p(ch):
counts[ch] = counts.get(ch, 0) + 1
return counts
def num_vowels(s):
'''
Given a string, returns a dict mapping each vowel
to a count of number of occurrences
NOTE: This function is case-insensitive
NOTE 2: If a vowel does not exist in the
input string, it is absent from
the keys of the output dict
'''
return __build_counts(s, lambda ch : ch.isalpha() and ch in VOWELS)
def num_consonants(s):
'''
Given a string, returns a dict mapping each consonant
to a count of number of occurrences
NOTE: This function is case-insensitive
NOTE 2: If a consonant does not exist in
the input string, it will be absent
from the keys of the output dict
'''
return __build_counts(s, lambda ch : ch.isalpha() and ch not in VOWELS)
def num_words(s):
'''
Given a string, returns a dict mapping each word
to a count of number of occurrences
NOTE: This function is NOT case-sensitive
'''
counts = {}
for w in s.split():
w = __remove_extra(w.lower())
counts[w] = counts.get(w, 0) + 1
return counts
def get_mood(s):
'''
Given a string, returns a "mood" (positive/neutral/negative)
associated with the content of this text
'''
# build a mapping between each word and its mood
raw_data = read_dataset()
sentiment_map = {d['word'] : d['polarity'] for d in raw_data}
scores = {'positive' : 0, 'negative' : 0, 'neutral' : 0}
# determine mood for each word in the input text
for word in s.split():
word = __remove_extra(word.lower())
if word in sentiment_map:
polarity = sentiment_map[word]
scores[polarity] = scores.get(polarity, 0) + 1
# return one of positive, negative, or neutral category
category, count = max(scores.items(), key = lambda kv : kv[1])
if count == 0 or scores['positive'] == scores['negative']:
category = 'neutral'
return category if category != 'both' else 'neutral'
def report_summary(s):
'''
Returns a tuple of various stats after
analyzing the given input string
'''
vowel_counts = num_vowels(s)
consonant_counts = num_consonants(s)
word_counts = num_words(s)
mood = get_mood(s)
return (vowel_counts, consonant_counts, word_counts, mood)
def report_json_summary(s):
'''
returns a python dictionary containing
various summary stats after analyzing
the given input string
The keys of this dictionary are defined as follows:
- vowel_count : a dictionary mapping vowels to number of occurrences
- consonant_count : a dictionary mapping consonants to number of occurrences
- word_count : a dictionary mapping words to number of occurences
- mood : a string representing the "mood" of the entire text {positive, neutral, negative}
- content : the content of the input string itself
'''
info_dict = {}
info_dict['vowel_count'] = num_vowels(s)
info_dict['consonant_count'] = num_consonants(s)
info_dict['word_count'] = num_words(s)
info_dict['mood'] = get_mood(s)
info_dict['content'] = s
return info_dict