-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrequency.py
23 lines (19 loc) · 856 Bytes
/
frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import sys
import json
from collections import Counter
all_words = []
def getTextFromTweet():
tweet_file = open(sys.argv[1]).readlines() # get the file of tweets
tweets = json.loads(tweet_file[22]) # the first 22 lines of a twitter response are junk
for tweet in tweets['statuses']: # loop through the json looking for tweets
tweet_words = tweet['text'].split() # find the text element and split it into words
all_words.extend(tweet_words) # append each tweets words into a list of all_words
counts = dict(Counter(all_words)) # turn all_words into a dictt, words as keys, occurences as values
total = len(counts) # total number of words
for k,v in counts.items():
freq = float(v) / float(total)
print k, round(freq, 5)
def main():
getTextFromTweet()
if __name__ == '__main__':
main()