link: https://colab.research.google.com/drive/1sZEyFqWfXtA8_TtctdNcJ8sZKeBI7Qtf?usp=sharing
Import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import MaxPooling2D, Dense, Dropout,Flatten, Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard,EarlyStopping
This dataset consists of the scanned images of brain of patient diagnosed of brain tumour.
!pip install -q kaggle
from google.colab import files
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d preetviradiya/brian-tumor-dataset
Downloading brian-tumor-dataset.zip to /content
90% 97.0M/107M [00:01<00:00, 72.5MB/s]
100% 107M/107M [00:01<00:00, 82.7MB/s]
!unzip brian-tumor-dataset
import os
import pandas as pd
tumor_dir = r'Brain Tumor Data Set/Brain Tumor Data Set/Brain Tumor'
healthy_dir = r'Brain Tumor Data Set/Brain Tumor Data Set/Healthy'
dir_list = [tumor_dir, healthy_dir]
image_path = []
label = []
for i, j in enumerate(dir_list):
images_list = os.listdir(j)
for f in images_list:
image_path.append(j + "/" + f)
if i == 0:
label.append('Not Cancer')
data = pd.DataFrame({'image_path': image_path, 'label': label})
image_path label
0 Brain Tumor Data Set/Brain Tumor Data Set/Brai... Cancer
1 Brain Tumor Data Set/Brain Tumor Data Set/Brai... Cancer
2 Brain Tumor Data Set/Brain Tumor Data Set/Brai... Cancer
3 Brain Tumor Data Set/Brain Tumor Data Set/Brai... Cancer
4 Brain Tumor Data Set/Brain Tumor Data Set/Brai... Cancer
(4600, 2)
Cancer 2513
Not Cancer 2087
Name: label, dtype: int64
Split the data into train, validation and test sets with percentages of 80%, 10% and 10% respectively.
from sklearn.model_selection import train_test_split
seed = 123
# Chia dữ liệu thành tập train và tập còn lại
train_set, remain_set = train_test_split(data, test_size=0.2, random_state=seed)
# Chia tập còn lại thành tập validation và tập test
val_set, test_set = train_test_split(remain_set, test_size=0.5, random_state=seed)
(3680, 2)
(460, 2)
(460, 2)
#Generate batches of tensor image data with real-time data augmentation.
image_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input)
train = image_generator.flow_from_dataframe(dataframe = train_set, x_col = "image_path", y_col ="label",
target_size = (244, 244),
color_mode = 'rgb',
class_mode = "categorical",
batch_size = 32,
shuffle = False
val = image_generator.flow_from_dataframe(dataframe = val_set, x_col ="image_path", y_col ="label",
target_size=(244, 244),
color_mode = 'rgb',
class_mode = "categorical",
batch_size = 32,
shuffle = False
test = image_generator.flow_from_dataframe(dataframe = test_set, x_col = "image_path", y_col ="label",
target_size = (244, 244),
color_mode = 'rgb',
class_mode = "categorical",
batch_size = 32,
shuffle = False
Found 3680 validated image filenames belonging to 2 classes.
Found 460 validated image filenames belonging to 2 classes.
Found 460 validated image filenames belonging to 2 classes.
import matplotlib.pyplot as plt
def show_images(image_generator):
img, label = image_generator.next()
for i in range(15):
plt.subplot(5, 5, i+1)
plt.imshow((img[i]+1)/2) #scale images between 0 and 1
idx = np.argmax(label[i])
if idx == 0:
plt.title('Not Cancer')
# Thiết lập Convolutional Neural Networks (CNN):
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(244, 244, 3)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(2, activation='sigmoid')
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train, epochs=10, verbose=1, validation_data = val)
Epoch 1/10
115/115 [==============================] - 1311s 11s/step - loss: 3.1010 - accuracy: 0.6742 - val_loss: 0.3472 - val_accuracy: 0.8783
Epoch 2/10
115/115 [==============================] - 1284s 11s/step - loss: 0.2459 - accuracy: 0.9087 - val_loss: 0.1274 - val_accuracy: 0.9630
Epoch 3/10
115/115 [==============================] - 1277s 11s/step - loss: 0.0912 - accuracy: 0.9685 - val_loss: 0.0988 - val_accuracy: 0.9652
Epoch 4/10
115/115 [==============================] - 1332s 12s/step - loss: 0.0552 - accuracy: 0.9812 - val_loss: 0.0973 - val_accuracy: 0.9739
Epoch 5/10
115/115 [==============================] - 1292s 11s/step - loss: 0.0323 - accuracy: 0.9875 - val_loss: 0.1113 - val_accuracy: 0.9717
Epoch 6/10
115/115 [==============================] - 1311s 11s/step - loss: 0.0266 - accuracy: 0.9905 - val_loss: 0.1038 - val_accuracy: 0.9739
Epoch 7/10
115/115 [==============================] - 1287s 11s/step - loss: 0.0211 - accuracy: 0.9908 - val_loss: 0.1308 - val_accuracy: 0.9674
Epoch 8/10
115/115 [==============================] - 1283s 11s/step - loss: 0.0373 - accuracy: 0.9910 - val_loss: 0.1418 - val_accuracy: 0.9565
Epoch 9/10
115/115 [==============================] - 1355s 12s/step - loss: 0.0316 - accuracy: 0.9872 - val_loss: 0.1020 - val_accuracy: 0.9761
Epoch 10/10
115/115 [==============================] - 1304s 11s/step - loss: 0.0191 - accuracy: 0.9921 - val_loss: 0.1541 - val_accuracy: 0.9696
# Accuracy
acc = history.history["accuracy"] # report of model
val_acc = history.history["val_accuracy"] # history of validation data
plt.subplot(2,1,1) # 2 rows and 1 columns
plt.plot(acc,label="Training Accuracy")
plt.plot(val_acc, label="Validation Acccuracy")
plt.ylabel("Accuracy", fontsize=12)
plt.title("Training and Validation Accuracy", fontsize=12)
# Loss
loss = history.history["loss"] # Training loss
val_loss = history.history["val_loss"] # validation loss
plt.plot(loss, label="Training Loss") #Training loss
plt.plot(val_loss, label="Validation Loss") # Validation Loss
plt.ylabel("Loss", fontsize=12)
plt.title("Training and Validation Losses", fontsize=12)
model.evaluate(test, verbose=1)
15/15 [==============================] - 38s 3s/step - loss: 0.1230 - accuracy: 0.9717
[0.1230454370379448, 0.9717391133308411]
pred = model.predict(test)
y_pred = np.argmax(pred, axis=1)
15/15 [==============================] - 41s 3s/step
y_test = test.labels
y_test = np.array(y_test)
from sklearn.metrics import accuracy_score
print("Accuracy of the Model:",accuracy_score(y_test, y_pred)*100,"%")
Accuracy of the Model: 97.17391304347827 %
from sklearn.metrics import confusion_matrix, accuracy_score
plt.figure(figsize = (10,5))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt = 'g', cmap = 'crest')
<Axes: >
To save training time we can stop training the CNN if the accuracy of the validation data does not improve after a certain number of steps. For example, the selection criteria is Accuracy on Validation data and the algorithm will stop after 5 steps.
from tensorflow.keras.callbacks import EarlyStopping
model_2 = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(244, 244, 3)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(2, activation='sigmoid')
early_stop = EarlyStopping(monitor = 'val_accuracy', patience = 5, min_delta=1)
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_2 = model_2.fit(train, epochs=10, verbose=1, validation_data = val, callbacks=[early_stop])
Epoch 1/10
115/115 [==============================] - 1307s 11s/step - loss: 2.7671 - accuracy: 0.7293 - val_loss: 0.2682 - val_accuracy: 0.8891
Epoch 2/10
115/115 [==============================] - 1222s 11s/step - loss: 0.1555 - accuracy: 0.9438 - val_loss: 0.1146 - val_accuracy: 0.9565
Epoch 3/10
115/115 [==============================] - 1259s 11s/step - loss: 0.0406 - accuracy: 0.9875 - val_loss: 0.0893 - val_accuracy: 0.9630
Epoch 4/10
115/115 [==============================] - 1248s 11s/step - loss: 0.0246 - accuracy: 0.9929 - val_loss: 0.0933 - val_accuracy: 0.9696
Epoch 5/10
115/115 [==============================] - 1245s 11s/step - loss: 0.0118 - accuracy: 0.9962 - val_loss: 0.1103 - val_accuracy: 0.9652
Epoch 6/10
115/115 [==============================] - 1270s 11s/step - loss: 0.0141 - accuracy: 0.9962 - val_loss: 0.0959 - val_accuracy: 0.9739
print('Train Accuracy with Early Stopping:', model_2.evaluate(test, verbose=1))
15/15 [==============================] - 36s 2s/step - loss: 0.0965 - accuracy: 0.9717
Train Accuracy with Early Stopping: [0.09646687656641006, 0.9717391133308411]
# Accuracy
acc = history_2.history["accuracy"] # report of model
val_acc = history_2.history["val_accuracy"] # history of validation data
plt.subplot(2,1,1) # 2 rows and 1 columns
plt.plot(acc,label="Training Accuracy")
plt.plot(val_acc, label="Validation Acccuracy")
plt.ylabel("Accuracy", fontsize=12)
plt.title("Training and Validation Accuracy", fontsize=12)
# Loss
loss = history_2.history["loss"] # Training loss
val_loss = history_2.history["val_loss"] # validation loss
plt.plot(loss, label="Training Loss") #Training loss
plt.plot(val_loss, label="Validation Loss") # Validation Loss
plt.ylabel("Loss", fontsize=12)
plt.title("Training and Validation Losses", fontsize=12)