quantizationawaretraining.py

# -*- coding: utf-8 -*-
"""QuantizationAwareTraining.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1KHMqT3RMkVKvOJMIZ545AIF7MB6DsFd6
"""

!nvidia-smi

! pip install -q tensorflow-model-optimization

"""#### You can use future to help to port your code from Python 2 to Python 3 today – and still have it run on Python 2.

#### If you already have Python 3 code, you can instead use future to offer Python 2 compatibility with almost no extra work.
"""

from __future__ import absolute_import,division,print_function,unicode_literals

# Commented out IPython magic to ensure Python compatibility.
try:
#   %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
import os 
import datetime
import tensorflow_datasets as tfds

# %load_ext tensorboard

tf.config.experimental.list_physical_devices()

import tensorflow_datasets as tfds
datasets,info= tfds.load(name='fashion_mnist',with_info=True,as_supervised=True,try_gcs=True,split=['train','test'])

info

print(info.features)

print(info.features["label"].num_classes)
print(info.features["label"].names)

fm_train,fm_test=datasets[0],datasets[1]
fm_val =fm_test.take(3000)
fm_test=fm_test.skip(3000)

len(datasets)

print(fm_train)

len(list(fm_train))

import matplotlib.pyplot as plt
import numpy as np
for fm_sample in fm_train.take(5):
  image,label = fm_sample[0],fm_sample[1]
  plt.figure()
  plt.imshow(image.numpy()[:,:,0].astype(np.float32),cmap=plt.get_cmap("gray"))
  plt.show()
  print("label: %d" %label.numpy())
  print("Category: %s" % info.features["label"].names[label.numpy()])

def scale(image,label):
  image = tf.cast(image,tf.float32)
  image /= 255.0

  return image,label

def get_dataset(batch_size=256):
  train_dataset_scaled = fm_train.map(scale).shuffle(6000).batch(batch_size)
  test_dataset_scaled = fm_test.map(scale).batch(batch_size)
  val_dataset_scaled = fm_val.map(scale).batch(batch_size)
  return train_dataset_scaled,test_dataset_scaled,val_dataset_scaled

def create_model():

  
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Conv2D(64,2,padding='same',activation='relu',input_shape=(28,28,1)))
  model.add(tf.keras.layers.MaxPooling2D())
  model.add(tf.keras.layers.Dropout(0.3))

  model.add(tf.keras.layers.Conv2D(128,2,padding='same',activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D())
  model.add(tf.keras.layers.Dropout(0.3))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(256))
  model.add(tf.keras.layers.Activation('relu'))
  model.add(tf.keras.layers.Dense(10))
  model.add(tf.keras.layers.Activation('softmax'))

  return model

import tensorflow_model_optimization as tfmot
model = create_model()
quantize_model = tfmot.quantization.keras.quantize_model

Q_aware_model = quantize_model(model)

Q_aware_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

Q_aware_model.summary()

logdir = os.path.join("/tmp/logs",datetime.datetime.now().strftime("%Y%m%d-%HOURS"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir,histogram_freq=1)

train_dataset,test_dataset,val_dataset = get_dataset()
train_dataset.cache()
val_dataset.cache()

Q_aware_model.fit(train_dataset,
                  epochs=5,
                  validation_data=val_dataset,
                  callbacks=[tensorboard_callback])

model.save('tmp/fashion.hdf5')
model.compiled_metrics

ls -alrt /tmp/fashion.hdf5

# Commented out IPython magic to ensure Python compatibility.
# %tensorboard --logdir /tmp/logs

!nvidia-smi

Q_aware_model.evaluate(test_dataset,verbose=0)

converter = tf.lite.TFLiteConverter.from_keras_model(Q_aware_model)
tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)
#converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_quantized_model = converter.convert()
open("quantized_model.tflite", "wb").write(tflite_quantized_model)

model.metrics

quantized_model_size = len(tflite_quantized_model)/1024
print("Quantized model size = %dKBs ",quantized_model_size)

interpreter = tf.lite.Interpreter(model_content=tflite_quantized_model)
interpreter.allocate_tensors()

input_tensor_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.tensor(interpreter.get_output_details()[0]["index"])

interpreter.get_tensor_details()

prediction_output = []
accurate_count = 0

for test_image in fm_test.map(scale):
  test_image_p = np.expand_dims(test_image[0],axis=0).astype(np.float32)
  interpreter.set_tensor(input_tensor_index,test_image_p)quantized_tflite_model

  interpreter.invoke()
  out = np.argmax(output_index()[0])
  prediction_output.append(out)

  if out == test_image[1].numpy():
    accurate_count +=1

accuracy = accurate_count/len(prediction_output)

print(accuracy)