Skip to content

Commit

Permalink
Merge pull request #9 from guangxush/imdb
Browse files Browse the repository at this point in the history
imdb update
  • Loading branch information
guangxush authored Dec 16, 2018
2 parents 11605ac + bee5802 commit d5c1196
Show file tree
Hide file tree
Showing 72 changed files with 7,597 additions and 32,099 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

### DataSet

- news
- IMDB data set

### Use

Expand All @@ -26,7 +26,8 @@

#### dataset

- dataset A (40000/10): train model 1 and using 0.2 part validate the model
- dataset B (1000): using model 1 generate model2 data and train model 2 and using 0.2 part validate the model
- dataset C (1000): test the model1&model2
- dataset A (3000/10): train model 1 and using 0.2 part validate the model
- dataset B (100): using model 1 generate model2 data and train model 2 and using 0.2 part validate the model
- dataset C (100): test the model1&model2
- the positive data count is equals the count of the negative data

40 changes: 15 additions & 25 deletions adaptive_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,41 @@
from util.data_load import make_err_dataset
from util import data_process
from model.model2 import mlp2
from util.data_load import generate_imdb_model2_data2
from util.util import cal_err_ratio
from util.data_load import generate_imdb_model2_data
from util.util import cal_err_ratio, cal_err_ratio_only
import numpy as np
from model_use import model_use
from model.model1 import lstm_attention_model, lstm_mul_model
from model.model1 import lstm_mul_model


# train model1
def model1(i):
def model1(index):
results_flag = True
if i > 10:
i = i % 10
i = index
if index >= 10:
i = index % 10
model2_file = './modfile/model2file/imdb.mlp.best_model.h5'
result_file = './data/err_data/imdb_'+str(i)+'.data'
data2_path = './data/model2_data/imdb_'+str(i)+'_data.csv'
# pos_file = "./data/part_data/train_pos_" + str(i) + ".txt"
# neg_file = "./data/part_data/train_neg_" + str(i) + ".txt"
train_file = "./data/part_data_all/train_" + str(i) + ".txt"
# train model1
monitor = 'val_acc'
filepath = "./modfile/model1file/lstm.best_model_"+str(i)+".h5"
check_pointer = ModelCheckpoint(filepath=filepath, monitor=monitor, verbose=1,
save_best_only=True, save_weights_only=True)
early_stopping = EarlyStopping(patience=5)
early_stopping = EarlyStopping(patience=3)
csv_logger = CSVLogger('logs/imdb_model2_mlp_' + str(i) + '.log')
Xtrain, Xtest, ytrain, ytest = data_process.get_imdb_part_data2(raw_file=train_file)
model = lstm_mul_model()
Xtrain, Xtest, ytrain, ytest = data_process.get_imdb_part_data(raw_file=train_file)
vocab_size = data_process.get_imdb_vocab_size(train_file)
model = lstm_mul_model(vocab_size=vocab_size)
model.fit(Xtrain, ytrain, batch_size=32, epochs=50, validation_data=(Xtest, ytest), verbose=1, shuffle=True,
callbacks=[check_pointer, early_stopping, csv_logger])
if results_flag:
print('Generate model2 dataset ...')
result_path = './data/model2_data/imdb_' + str(i) + '_data.csv'
model_file = './modfile/model1file/lstm.best_model_'
# test_pos_file = './data/part_data/test_pos_0.txt'
# test_neg_file = './data/part_data/test_neg_0.txt'
test_file = './data/part_data_all/test_0.txt'
generate_imdb_model2_data2(model_file=model_file, result_path=result_path, test_file=test_file, count=10)
generate_imdb_model2_data(model_file=model_file, result_path=result_path, test_file=test_file, count=10)
print('Load result ...')

X_test, Y_test = load_data3(data_path=data2_path)
Expand All @@ -51,10 +49,6 @@ def model1(i):
results = mlp2_model.predict(X_test)
label = np.argmax(results, axis=1)
y_label = Y_test
print("pred:", end='')
print(label)
print("true:", end='')
print(y_label)
make_err_dataset(result_path=result_file, label=label, x_test=X_test, y_test=y_label)
cal_err_ratio(file_name='train', label=label, y_test=y_label)
print('***** End Model1 Train *****')
Expand All @@ -79,22 +73,18 @@ def model2(i):
mlp_model2.fit(x_train, y_train, batch_size=128, epochs=100, verbose=1, shuffle=True, validation_data=(x_test, y_test),
callbacks=[check_pointer, early_stopping, csv_logger])
if results_flag:
print('Generate submission ...')
print('Test Model2 ...')
mlp_model2.load_weights(filepath=filepath)
results = mlp_model2.predict(x_test)
label = np.argmax(results, axis=1)
y_test = np.argmax(y_test, axis=1)
print("pred:", end='')
print(label)
print("true:", end='')
print(y_test)
# make_err_dataset(result_path='./err_data/iris_1_error_data.csv', label=label, x_test=x_test, y_test=y_test)
cal_err_ratio_only(label=label, y_test=y_test)
print('***** End Model2 Train *****')


if __name__ == '__main__':
model2(0)
for i in range(1, 10):
for i in range(1, 11):
print('***** ' + str(i) + ' START! *****')
model1(i)
model2(i)
Expand Down
Loading

0 comments on commit d5c1196

Please sign in to comment.