diff --git a/musco/tf/compressor/common/utils.py b/musco/tf/compressor/common/utils.py index 6a3609e..2b54e73 100644 --- a/musco/tf/compressor/common/utils.py +++ b/musco/tf/compressor/common/utils.py @@ -13,7 +13,7 @@ def to_tf_kernel_order(tensor): :param tensor: tensor with conv.kernel weights. :return: tensor with the Tensoflow-like exis order. - [] + [out_channels, in_channels, filter_height, filter_width] -> [filter_height, filter_width, in_channels, out_channels] """ return np.transpose(tensor, (2, 3, 1, 0)) @@ -24,7 +24,7 @@ def to_pytorch_kernel_order(tensor): :param tensor: tensor with conv.kernel weights. :return: tensor with the Pytorch-like exis order. - [] + [filter_height, filter_width, in_channels, out_channels] -> [out_channels, in_channels, filter_height, filter_width] """ return np.transpose(tensor, (3, 2, 0, 1)) diff --git a/musco/tf/compressor/compress.py b/musco/tf/compressor/compress.py index 65dd1f4..d3cbe42 100644 --- a/musco/tf/compressor/compress.py +++ b/musco/tf/compressor/compress.py @@ -8,12 +8,20 @@ from musco.tf.compressor.decompositions.cp3 import get_cp3_seq from musco.tf.compressor.decompositions.cp4 import get_cp4_seq from musco.tf.compressor.decompositions.svd import get_svd_seq +from musco.tf.compressor.decompositions.svd_1x1 import get_svd_1x1_seq from musco.tf.compressor.decompositions.tucker2 import get_tucker2_seq from musco.tf.compressor.exceptions.compression_error import CompressionError from tqdm import tqdm -def compress_seq(model, decompose_info, optimize_rank=False, vbmf=True, vbmf_weaken_factor=0.8): +def compress_seq(model, + decompose_info, + optimize_rank=False, + vbmf=True, + vbmf_weaken_factor=0.8, + rank_selection="manual", + param_reduction_rate=None + ): """Compresses source model using decompositions from decompose_info dict. For example if decompose_info = { @@ -36,6 +44,9 @@ def compress_seq(model, decompose_info, optimize_rank=False, vbmf=True, vbmf_wea :param optimize_rank: rank to be optimized :param vbmf: use auto rank selection :param vbmf_weaken_factor: vbmf weaken factor + :param rank_selection: type of rank selection for svd_1x1. Available vaues are 'manual', 'vbmf', 'param_reduction'. + For the last one reduction rate is defined by parameter param_reduction_rate + :param param_reduction_rate: reduction rate for rank_selection='param_reduction' :return: new tf.keras.Model with compressed layers """ @@ -70,6 +81,15 @@ def compress_seq(model, decompose_info, optimize_rank=False, vbmf=True, vbmf_wea optimize_rank=optimize_rank, vbmf=vbmf, vbmf_weaken_factor=vbmf_weaken_factor) + elif decompose.lower() == "svd_1x1": + logging.info("SVD 1x1 layer {}".format(layer.name)) + new_layer = get_svd_1x1_seq(layer, + rank=decomp_rank, + optimize_rank=optimize_rank, + vbmf_weaken_factor=vbmf_weaken_factor, + rank_selection=rank_selection, + param_reduction_rate=param_reduction_rate + ) else: logging.info("Incorrect decompositions type for the layer {}".format(layer.name)) raise NameError( @@ -113,6 +133,7 @@ def insert_layer_noseq(model, layer_regexs): layers_order = [model.layers[0].name] for layer in tqdm(model.layers[1:], desc="{Insert layers}"): + print("Woring on layer " + layer.name) added_layer = None # Determine input tensors. @@ -121,6 +142,7 @@ def insert_layer_noseq(model, layer_regexs): if len(layer_input) == 1: layer_input = layer_input[0] + print("Layer input: " + str(layer_input)) # Insert layer if name matches the regular expression. changed = False @@ -175,7 +197,13 @@ def insert_layer_noseq(model, layer_regexs): return new_model -def compress_noseq(model, decompose_info, optimize_rank=False, vbmf=True, vbmf_weaken_factor=0.8): +def compress_noseq(model, + decompose_info, + optimize_rank=False, + vbmf=True, + vbmf_weaken_factor=0.8, + rank_selection="manual", + param_reduction_rate=None): new_model = model layer_regexs = dict() @@ -202,6 +230,14 @@ def compress_noseq(model, decompose_info, optimize_rank=False, vbmf=True, vbmf_w optimize_rank=optimize_rank, vbmf=vbmf, vbmf_weaken_factor=vbmf_weaken_factor) + elif decompose.lower() == "svd_1x1": + layer_regexs[layer.name] = get_svd_1x1_seq(layer, + rank=decomp_rank, + optimize_rank=optimize_rank, + vbmf_weaken_factor=vbmf_weaken_factor, + rank_selection=rank_selection, + param_reduction_rate=param_reduction_rate + ) except ValueError: continue diff --git a/musco/tf/compressor/decompositions/svd.py b/musco/tf/compressor/decompositions/svd.py index cd8bce0..9602916 100644 --- a/musco/tf/compressor/decompositions/svd.py +++ b/musco/tf/compressor/decompositions/svd.py @@ -59,16 +59,15 @@ def get_config(layer, copy_conf): redundant_keys = {"units", "kernel_initializer", "bias_initializer", "name"} if isinstance(layer, keras.Sequential): - for l in layer.layers: - confs.append(del_keys(l.get_config(), redundant_keys)) + confs = [l.get_config() for l in layer.layers] elif isinstance(layer, keras.layers.Dense): # Get conf of the source layer. - conf = {} if not copy_conf else del_keys(layer.get_config(), redundant_keys) + conf = {} if not copy_conf else layer.get_config() # Source layer is decomposed into 3, that's why we need 3 confs here. confs = [conf] * 3 - return confs + return [del_keys(conf, redundant_keys)for conf in confs] get_svd_seq = construct_compressor(get_params, None, get_svd_factors, get_layers_params_for_factors, get_config, diff --git a/musco/tf/compressor/decompositions/svd_1x1.py b/musco/tf/compressor/decompositions/svd_1x1.py new file mode 100644 index 0000000..e200eda --- /dev/null +++ b/musco/tf/compressor/decompositions/svd_1x1.py @@ -0,0 +1,152 @@ +# Decompose 1x1 conv2d using SVD + +import numpy as np + +from tensorflow import keras + +from musco.tf.compressor.decompositions.constructor import construct_compressor +from musco.tf.compressor.common.utils import del_keys +from musco.tf.compressor.rank_selection.estimator import estimate_rank_for_compression_rate, estimate_vbmf_ranks +from musco.tf.compressor.common.utils import to_tf_kernel_order, to_pytorch_kernel_order +from musco.tf.compressor.decompositions.svd import get_truncated_svd +from musco.tf.compressor.exceptions.compression_error import CompressionError + + +def get_params(layer): + cin = None + cout = None + kernel_size = None + padding = None + strides = None + activation = None + batch_input_shape = None + + if isinstance(layer, keras.Sequential): + # If the layer has been decomposed at least once, then + # the first layer in a sequence contains in_channels, + # the second layer contains information about kernel_size, padding and strides, + # the third layer contains information about out_channels. + layer_1, layer_2 = layer.layers + conf_1, conf_2 = layer_1.get_config(), layer_2.get_config() + + if "batch_input_shape" in conf_1: + batch_input_shape = conf_1["batch_input_shape"] + + cin = layer.input_shape[-1] if layer_1.data_format == "channels_last" else layer.input_shape[0] + cout = layer.output_shape[-1] if layer_2.data_format == "channels_last" else layer.output_shape[0] + kernel_size = conf_2["kernel_size"] + padding = conf_2["padding"] + strides = conf_2["strides"] + activation = conf_2["activation"] + elif isinstance(layer, keras.layers.Conv2D): + cin = layer.input_shape[-1] if layer.data_format == "channels_last" else layer.input_shape[0] + cout = layer.output_shape[-1] if layer.data_format == "channels_last" else layer.output_shape[0] + layer_conf = layer.get_config() + kernel_size = layer_conf["kernel_size"] + padding = layer_conf["padding"] + strides = layer_conf["strides"] + activation = layer_conf["activation"] + + if "batch_input_shape" in layer_conf: + batch_input_shape = layer_conf["batch_input_shape"] + + if cin is None or cout is None or kernel_size is None or padding is None or strides is None or \ + activation is None: + raise CompressionError() + + return dict(cin=cin, cout=cout, kernel_size=kernel_size, padding=padding, strides=strides, + batch_input_shape=batch_input_shape, activation=activation) + + +def get_rank(layer, rank, cin, cout, + rank_selection="manual", vbmf_weaken_factor=1.0, + param_reduction_rate=None, **kwargs): + if rank_selection == 'vbmf': + if isinstance(layer, keras.Sequential): + return estimate_vbmf_ranks(to_pytorch_kernel_order(layer.get_weights()[1]), vbmf_weaken_factor) + else: + return estimate_vbmf_ranks(to_pytorch_kernel_order(layer.get_weights()[0]), vbmf_weaken_factor) + elif rank_selection == 'manual': + return int(rank) + elif rank_selection == 'param_reduction': + if isinstance(layer, keras.Sequential): + return layer.layers[0].output_shape[-1] // param_reduction_rate + else: + return estimate_rank_for_compression_rate((cout, cin), + rate=param_reduction_rate, + key='svd') + + +def get_weights_and_bias(layer): + """Returns weights and biases. + + :param layer: a source layer + :return: If layer is tf.keras.layers.Conv2D layer.weights is returned as weights, + Otherwise a list of weight tensors and bias tensor are returned as weights. + The second element that is returned is a bias tensor. + Note that all weights are returned in PyTorch dimension order: + [out_channels, in_channels, kernel_size[0]*kernel_size[1]] + """ + + weights = None + bias = None + + if isinstance(layer, keras.Sequential): + weights, bias = layer.layers[-1].get_weights() + elif isinstance(layer, keras.layers.Conv2D): + weights, bias = layer.get_weights() + + weights = to_pytorch_kernel_order(weights) + weights = weights.reshape(weights.shape[:2]) + + if weights is None or bias is None: + raise CompressionError() + + return weights, bias + + +def get_svd_factors(layer, rank, **kwargs): + weights, bias = get_weights_and_bias(layer=layer) + u, s, v_adj = get_truncated_svd(weights, rank) + + w0 = np.dot(np.sqrt(s), v_adj) + w1 = np.dot(u, np.sqrt(s)) + if isinstance(layer, keras.Sequential): + w0_old = layer.layers[0].get_weights()[0] + w0 = np.dot(w0, w0_old) + + w0, w1 = [to_tf_kernel_order(w.reshape((*w.shape, 1, 1))) for w in [w0, w1]] + + return [w0, w1], [None, bias] + + +def get_layers_params_for_factors(cout, rank, kernel_size, padding, strides, batch_input_shape, activation, **kwargs): + new_layers = [keras.layers.Conv2D, keras.layers.Conv2D] + params = [ + dict(kernel_size=(1, 1), filters=rank, padding="same", use_bias=False), + dict(kernel_size=kernel_size, filters=cout, padding=padding, strides=strides, activation=activation), + ] + + if batch_input_shape is not None: + params[0]["batch_input_shape"] = batch_input_shape + + return new_layers, params + + +def get_config(layer, copy_conf): + confs = None + if isinstance(layer, keras.Sequential): + confs = [l.get_config() for l in layer.layers] + elif isinstance(layer, keras.layers.Conv2D): + if copy_conf: + confs = [layer.get_config()] * 2 + else: + confs = [{}] * 2 + + redundant_keys = {"kernel_initializer", "bias_initializer", "name", "kernel_size", "padding", "strides", "filters", + "activation"} + return [del_keys(conf, redundant_keys)for conf in confs] + + +get_svd_1x1_seq = construct_compressor(get_params, get_rank, get_svd_factors, get_layers_params_for_factors, get_config, + (keras.layers.Conv2D, keras.Sequential)) diff --git a/musco/tf/compressor/decompositions/test/__init__.py b/musco/tf/compressor/decompositions/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/musco/tf/compressor/decompositions/test/svd_1x1_example.py b/musco/tf/compressor/decompositions/test/svd_1x1_example.py new file mode 100644 index 0000000..04e8114 --- /dev/null +++ b/musco/tf/compressor/decompositions/test/svd_1x1_example.py @@ -0,0 +1,169 @@ +import numpy as np +import tensorflow as tf + +from tensorflow import keras +from musco.tf.compressor.compress import compress_seq, compress_noseq + + +def test_tucker2(take_first=None): + fashion_mnist = keras.datasets.fashion_mnist + (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() + + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + if take_first is not None: + train_images = train_images[:take_first, ...] + train_labels = train_labels[:take_first, ...] + + test_images = test_images[:take_first, ...] + test_labels = test_labels[:take_first, ...] + + train_images = np.expand_dims(train_images, -1) + test_images = np.expand_dims(test_images, -1) + print (train_images[0].shape) + def createModel() : + input = tf.keras.layers.Input(shape=(28,28,1)) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(3, 3), + padding='valid', + activation='relu', + input_shape=(28, 28, 1))(input) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(3, 3), + padding='valid', + activation='relu')(x) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(1, 1), + padding='valid', + activation='relu', + name='test_1')(x) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(1, 1), + padding='valid', + activation='relu', + name='test_2')(x) + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense(10, activation='softmax')(x) + return tf.keras.Model(input, x) + model = createModel() + model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + model.fit(train_images, + train_labels, + epochs=2) + + model.summary() + + print('Evaluate source model') + test_loss, test_acc = model.evaluate(test_images, + test_labels, + verbose=0) + print('Test accuracy:', test_acc) + + compressed_model = compress_noseq(model, { + 'test_1': ('svd_1x1', 64), + 'test_2': ('svd_1x1', 50) + }) + + compressed_model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + print('Evaluate compressed model') + test_loss, test_acc = compressed_model.evaluate(test_images, + test_labels, + verbose=0) + + compressed_model.summary() + print('Test accuracy:', test_acc) + + # for layer in compressed_model.layers: + # print(layer.name) + + +def test_tucker2_n_stages(take_first=None): + fashion_mnist = keras.datasets.fashion_mnist + (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() + + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + if take_first is not None: + train_images = train_images[:take_first, ...] + train_labels = train_labels[:take_first, ...] + + test_images = test_images[:take_first, ...] + test_labels = test_labels[:take_first, ...] + + train_images = np.expand_dims(train_images, -1) + test_images = np.expand_dims(test_images, -1) + def createModel(): + inputs = tf.keras.layers.Input(shape=(28, 28, 1)) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(3, 3), + padding='valid', + activation='relu', + input_shape=(28, 28, 1))(inputs) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(3, 3), + padding='valid', + activation='relu')(x) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(1, 1), + padding='valid', + activation='relu', + name='test_1')(x) + x = tf.keras.layers.Conv2D(filters=64, + kernel_size=(1, 1), + padding='valid', + activation='relu', + name='test_2')(x) + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense(10, activation='softmax')(x) + return tf.keras.Model(inputs, x) + model = createModel() + model.summary() + model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + model.fit(train_images, + train_labels, + epochs=2) + + print('Evaluate source model') + test_loss, test_acc = model.evaluate(test_images, + test_labels, + verbose=0) + print('Test accuracy:', test_acc) + + compressed_model = model + for idx in range(2): + print("BEFORE COMPRESS") + compressed_model.summary() + compressed_model = compress_noseq(compressed_model, { + 'test_1': ('svd_1x1', 10), + 'test_2': ('svd_1x1', 10) + }) + + compressed_model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + print('Evaluate compressed model', idx + 1) + test_loss, test_acc = compressed_model.evaluate(test_images, + test_labels, + verbose=0) + + # compressed_model.summary() + print('Test accuracy:', test_acc) + + +# TODO: write regular tests +if __name__ == "__main__": + print("!!!!", tf.__version__) + #test_tucker2(10) + test_tucker2_n_stages(1000)