-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
185 lines (154 loc) · 6.19 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 7 13:09:56 2019
@author: Amir.Khan
"""
from flask import Flask, render_template,request,url_for
from flask_bootstrap import Bootstrap
import numpy as np
import time
app = Flask(__name__)
Bootstrap(app)
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import os
from model_final import *
from googletrans import Translator
translator = Translator()
value = '';
actual = '';
def final_predictions(x, y, x_tk, y_tk, s):
"""
Gets predictions using the final model
:param x: Preprocessed English data
:param y: Preprocessed French data
:param x_tk: English tokenizer
:param y_tk: French tokenizer
"""
# TODO: Train neural network using model_final
v = '';
a = '';
print(len(x_tk.word_index))
print(len(y_tk.word_index))
model = model_final(x.shape,
y.shape[1],
len(x_tk.word_index) + 1,
len(y_tk.word_index) + 1)
model.load_weights('models/machine_translation/best_combine.hdf5')
## DON'T EDIT ANYTHING BELOW THIS LINE
y_id_to_word = {value: key for key, value in y_tk.word_index.items()}
y_id_to_word[0] = '<PAD>'
if s == "new jersey is sometimes quiet during autumn , and it is snowy in april.":
sentences = np.array([x[0]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[0]
elif s == "the united states is usually chilly during july , and it is usually freezing in november.":
sentences = np.array([x[1]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[1]
elif s == "california is usually quiet during march , and it is usually hot in june .":
sentences = np.array([x[2]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[2]
elif s == "the united states is sometimes mild during june , and it is cold in september .":
sentences = np.array([x[3]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[3]
elif s == "your least liked fruit is the grape , but my least liked is the apple .":
sentences = np.array([x[4]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[4]
elif s == "his favorite fruit is the orange , but my favorite is the grape .":
sentences = np.array([x[5]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
actual = french_sentences[5]
else:
sentence = [x_tk.word_index[word] for word in s.split()]
sentence = pad_sequences([sentence], maxlen=x.shape[-1], padding='post')
sentences = np.array([sentence[0],x[0]])
predictions = model.predict(sentences, len(sentences))
value = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
translations = translator.translate(s, dest='fr')
actual = translations.text
v = value
a = actual
return v, a
def load_data(path):
"""
Load dataset
"""
input_file = os.path.join(path)
with open(input_file, "r") as f:
data = f.read()
return data.split('\n')
french_sentences = load_data("data/small_vocab_fr")
english_sentences = load_data("data/small_vocab_en")
def tokenize(x):
"""
Tokenize x
:param x: List of sentences/strings to be tokenized
:return: Tuple of (tokenized x data, tokenizer used to tokenize x)
"""
try:
tokenizer = Tokenizer(num_words=None, char_level=False)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)
return sequences, tokenizer
except Exception:
#TODO: implement proper logging
print('Something wrong, please check.')
def pad(x, length=None):
"""
Pad x
:param x: List of sequences.
:param length: Length to pad the sequence to. If None, use length of longest sequence in x.
:return: Padded numpy array of sequences
"""
try:
padded = pad_sequences(x, maxlen=length,
padding='post', truncating='post')
return padded
except Exception:
#TODO: implement proper logging
print('Something wrong, please check.')
def preprocess(x, y):
"""
Preprocess x and y
:param x: Feature List of sentences
:param y: Label List of sentences
:return: Tuple of (Preprocessed x, Preprocessed y, x tokenizer, y tokenizer)
"""
preprocess_x, x_tk = tokenize(x)
preprocess_y, y_tk = tokenize(y)
preprocess_x = pad(preprocess_x)
preprocess_y = pad(preprocess_y)
# Keras's sparse_categorical_crossentropy function requires the labels to be in 3 dimensions
preprocess_y = preprocess_y.reshape(*preprocess_y.shape, 1)
return preprocess_x, preprocess_y, x_tk, y_tk
preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer =\
preprocess(english_sentences, french_sentences)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyse',methods=['POST'])
def analyse():
start = time.time()
if request.method == 'POST':
rawtext = request.form['rawtext']
print(rawtext)
cv,av = final_predictions(preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer,rawtext)
print(cv)
rv = cv.replace('<PAD>', '')
end = time.time()
final_time = end-start
return render_template('success_machinetranslation.html', received_text = rawtext, result=rv, actualv = av,final_time=final_time)
if __name__ == '__main__':
app.run(debug=True)