-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathread Facebook
57 lines (43 loc) · 1.45 KB
/
read Facebook
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
import io
import unicodedata
import numpy as np
import re
import string
from numpy import linalg
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tokenize import PunktSentenceTokenizer
from nltk.tokenize import PunktSentenceTokenizer
from nltk.corpus import webtext
from nltk.stem.porter import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
with open('kindle.txt', encoding ='ISO-8859-2') as f:
text = f.read()
sent_tokenizer = PunktSentenceTokenizer(text)
sents = sent_tokenizer.tokenize(text)
print(word_tokenize(text))
print(sent_tokenize(text))
porter_stemmer = PorterStemmer()
nltk_tokens = nltk.word_tokenize(text)
for w in nltk_tokens:
print ("Actual: % s Stem: % s" % (w, porter_stemmer.stem(w)))
wordnet_lemmatizer = WordNetLemmatizer()
nltk_tokens = nltk.word_tokenize(text)
for w in nltk_tokens:
print ("Actual: % s Lemma: % s" % (w, wordnet_lemmatizer.lemmatize(w)))
text = nltk.word_tokenize(text)
print(nltk.pos_tag(text))
sid = SentimentIntensityAnalyzer()
tokenizer = nltk.data.load('tokenizers / punkt / english.pickle')
with open('kindle.txt', encoding ='ISO-8859-2') as f:
for text in f.read().split('\n'):
print(text)
scores = sid.polarity_scores(text)
for key in sorted(scores):
print('{0}: {1}, '.format(key, scores[key]), end ='')
print()