-
Notifications
You must be signed in to change notification settings - Fork 483
/
tf_sentiment.py
77 lines (56 loc) · 3.1 KB
/
tf_sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import tensorflow_datasets as tfds
import tensorflow as tf
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
encoder = info.features['text'].encoder
BUFFER_SIZE = 10000
BATCH_SIZE = 64
padded_shapes = ([None], ())
train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE,
padded_shapes=padded_shapes)
test_dataset = test_dataset.padded_batch(BATCH_SIZE,
padded_shapes=padded_shapes)
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')])
model.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['accuracy'])
history = model.fit(train_dataset, epochs=5, validation_data=test_dataset,
validation_steps=30)
def pad_to_size(vec, size):
zeros = [0]*(size-len(vec))
vec.extend(zeros)
return vec
def sample_predict(sentence, pad, model_):
encoded_sample_pred_text = encoder.encode(sentence)
if pad:
encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
predictions = model_.predict(tf.expand_dims(encoded_sample_pred_text, 0))
return predictions
sample_text = ('This movie was awesome. The acting was incredible. Highly recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100
print('probability this is a positive review %.2f' % predictions)
sample_text = ('This movie was so so. The acting was medicore. Kind of recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100
print('probability this is a positive review %.2f' % predictions)
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1, activation='sigmoid')])
model.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['accuracy'])
history = model.fit(train_dataset, epochs=5, validation_data=test_dataset,
validation_steps=30)
sample_text = ('This movie was awesome. The acting was incredible. Highly recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100
print('probability this is a positive review %.2f' % predictions)
sample_text = ('This movie was so so. The acting was medicore. Kind of recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100
print('probability this is a positive review %.2f' % predictions)