From fa2b3b7d94a6813d15b8cf3542eaf89f3306be5c Mon Sep 17 00:00:00 2001 From: Reuven <44209964+reuvenperetz@users.noreply.github.com> Date: Mon, 14 Oct 2024 19:33:45 +0300 Subject: [PATCH] Move Keras qat activation quantizers to trainable infrastructure (#1240) Move STE and LSQ activation quantizers in Keras from QAT to the new trainable infrastructure module. Add flag 'freeze_quantization_params' to align them with pytorch quantizers (even though this flag is meaningless in Keras). Rename Trainable QAT quantizer to be Weight Trainable quantizer. --------- Co-authored-by: reuvenp --- ....py => base_keras_qat_weight_quantizer.py} | 16 +- .../qat/keras/quantizer/lsq/symmetric_lsq.py | 131 +--------------- .../qat/keras/quantizer/lsq/uniform_lsq.py | 125 +-------------- .../keras/quantizer/quantization_builder.py | 13 +- .../quantizer/ste_rounding/symmetric_ste.py | 123 +-------------- .../quantizer/ste_rounding/uniform_ste.py | 100 +----------- .../keras/activation_quantizers/__init__.py | 20 +++ .../base_activation_quantizer.py | 22 +++ .../activation_quantizers/lsq/__init__.py | 14 ++ .../lsq/symmetric_lsq.py | 127 +++++++++++++++ .../activation_quantizers/lsq/uniform_lsq.py | 129 +++++++++++++++ .../activation_quantizers/ste/__init__.py | 14 ++ .../ste/symmetric_ste.py | 148 ++++++++++++++++++ .../activation_quantizers/ste/uniform_ste.py | 122 +++++++++++++++ .../keras/base_keras_quantizer.py | 22 +-- .../keras/quantizer_utils.py | 60 +++++++ .../feature_networks/qat/qat_test.py | 11 +- .../test_keras_trainable_infra_runner.py | 24 +-- 18 files changed, 717 insertions(+), 504 deletions(-) rename model_compression_toolkit/qat/keras/quantizer/{base_keras_qat_quantizer.py => base_keras_qat_weight_quantizer.py} (73%) create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py create mode 100644 model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py diff --git a/model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py b/model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py similarity index 73% rename from model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py rename to model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py index 34fc36d0b..86587b12f 100644 --- a/model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +++ b/model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py @@ -22,24 +22,14 @@ if FOUND_TF: - class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer): + class BaseKerasQATWeightTrainableQuantizer(BaseKerasTrainableQuantizer): """ A base class for trainable Keras quantizer for QAT. """ - - def __init__(self, - quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]): - """ - Initializes BaseKerasQATTrainableQuantizer object. - - Args: - quantization_config: quantizer config class contains all the information about a quantizer configuration. - """ - - super().__init__(quantization_config) + pass else: # pragma: no cover - class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer): + class BaseKerasQATWeightTrainableQuantizer(BaseKerasTrainableQuantizer): def __init__(self, quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]): diff --git a/model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py b/model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py index 0ed1d0c84..632a34c85 100644 --- a/model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +++ b/model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py @@ -28,47 +28,18 @@ from model_compression_toolkit.qat.common import THRESHOLD_TENSOR from model_compression_toolkit import constants as C -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer -from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \ - TrainableQuantizerActivationConfig -from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, \ - ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig +from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale - - -def symmetric_lsq_quantizer(x: tf.Tensor, - thresholds: tf.Tensor, - num_bits: int, - sign: bool, - min_int: int, - max_int:int, - scale_factor: float) -> tf.Tensor: - """ - Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf - Args: - x: input to quantize - thresholds: thresholds of quantization levels - num_bits: number of bits for quantization - sign: whether x is signed or not - min_int: min clipping integer value - max_int: max clipping integer value - scale_factor: grad scale of LSQ algorithm - Returns: - A quantized tensor - """ - delta = thresholds / (2 ** (num_bits - int(sign))) - delta_scaled = grad_scale(delta, scale_factor) - rounded = ste_round(x / delta_scaled) - clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int) - quantized = delta_scaled * clipped - return quantized +from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import symmetric_lsq_quantizer @mark_quantizer(quantization_target=QuantizationTarget.Weights, quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], identifier=TrainingMethod.LSQ) -class LSQWeightQATQuantizer(BaseKerasQATTrainableQuantizer): +class LSQWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer): """ Trainable constrained quantizer to quantize layer's weights. """ @@ -159,95 +130,3 @@ def convert2inferable(self) -> Union[WeightsPOTInferableQuantizer, WeightsSymmet input_rank=len(self.threshold_shape)) -@mark_quantizer(quantization_target=QuantizationTarget.Activation, - quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], - identifier=TrainingMethod.LSQ) -class LSQActivationQATQuantizer(BaseKerasQATTrainableQuantizer): - """ - Trainable constrained quantizer to quantize layer activations. - """ - - def __init__(self, quantization_config: TrainableQuantizerActivationConfig): - """ - Initialize a LSQActivationQATQuantizer object with parameters to use - for the quantization. - - Args: - quantization_config: trainable quantizer config class - """ - super().__init__(quantization_config) - self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO - self.threshold_values = float(quantization_config.activation_quantization_params[C.THRESHOLD]) - self.threshold_shape = np.asarray(self.threshold_values).shape - self.sign = quantization_config.activation_quantization_params[SIGNED] - self.num_bits = quantization_config.activation_n_bits - n_pos_bits = self.num_bits - int(self.sign) - self.min_int = -int(self.sign) * (2 ** n_pos_bits) - self.max_int = (2 ** n_pos_bits) - 1 - if self.power_of_two: - self.threshold_values = np.power(2.0, np.ceil(np.log2(np.maximum(self.threshold_values, C.MIN_THRESHOLD)))) - - - def initialize_quantization(self, - tensor_shape: TensorShape, - name: str, - layer: KerasTrainableQuantizationWrapper): - """ - Add quantizer parameters to the quantizer parameters dictionary - - Args: - tensor_shape: tensor shape of the quantized tensor. - name: Tensor name. - layer: Layer to quantize. - """ - ptq_threshold_tensor = layer.add_weight( - name + THRESHOLD_TENSOR, - shape=(), - initializer=tf.keras.initializers.Constant(1.0), - trainable=True) - ptq_threshold_tensor.assign(self.threshold_values) - - # save the quantizer added parameters for later calculations - self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS) - - def __call__(self, - inputs: tf.Tensor, - training: bool): - """ - Quantize a tensor. - Args: - inputs: Input tensor to quantize. - training: Whether the graph is in training mode. - - Returns: - The quantized tensor. - """ - - thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR) - n_channels = inputs.shape[-1] - scale_factor = 1.0 / np.sqrt(self.max_int * n_channels) - q_tensor = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor) - return q_tensor - - def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]: - """ - Convert quantizer to inferable quantizer. - - Returns: - BaseKerasInferableQuantizer object. - """ - - if self.power_of_two: - thresholds = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR).numpy())) - return ActivationPOTInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we pass - # the threshold as a list with a len of 1 - threshold=[thresholds], - signed=self.sign) - else: - thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR).numpy() - return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we - # pass the threshold as a list with a len of 1 - threshold=[thresholds], - signed=self.sign) diff --git a/model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py b/model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py index 227a3bb03..9efa4198f 100644 --- a/model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +++ b/model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py @@ -16,6 +16,8 @@ import tensorflow as tf from tensorflow.python.framework.tensor_shape import TensorShape from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \ + BaseKerasQATWeightTrainableQuantizer from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper from model_compression_toolkit.trainable_infrastructure import TrainingMethod @@ -26,47 +28,18 @@ from model_compression_toolkit import constants as C -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \ TrainableQuantizerActivationConfig from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale, adjust_range_to_include_zero - - -def uniform_lsq_quantizer(x: tf.Tensor, - min_range: tf.Tensor, - max_range: tf.Tensor, - num_bits: int, - min_int: int, - max_int:int, - scale_factor: float) -> tf.Tensor: - """ - Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf - Args: - x: input to quantize - min_range: min range of quantization values - max_range: min range of quantization values - num_bits: number of bits for quantization - min_int: min clipping integer value - max_int: max clipping integer value - scale_factor: grad scale of LSQ algorithm - Returns: - A quantized tensor - """ - min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits) - delta = (max_range - min_range) / (2 ** num_bits - 1) - delta_scaled = grad_scale(delta, scale_factor) - rounded = ste_round((x-min_range) / delta_scaled) - clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int) - quantized = delta_scaled * clipped + min_range - return quantized +from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import uniform_lsq_quantizer @mark_quantizer(quantization_target=QuantizationTarget.Weights, quantization_method=[QuantizationMethod.UNIFORM], identifier=TrainingMethod.LSQ) -class LSQUniformWeightQATQuantizer(BaseKerasQATTrainableQuantizer): +class LSQUniformWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer): """ Trainable constrained quantizer to quantize layer's weights. """ @@ -158,93 +131,3 @@ def convert2inferable(self) -> BaseKerasInferableQuantizer: channel_axis=self.channel_axis, input_rank=len(self.min_max_shape)) - -@mark_quantizer(quantization_target=QuantizationTarget.Activation, - quantization_method=[QuantizationMethod.UNIFORM], - identifier=TrainingMethod.LSQ) -class LSQUniformActivationQATQuantizer(BaseKerasQATTrainableQuantizer): - """ - Trainable constrained quantizer to quantize layer activations. - """ - - def __init__(self, quantization_config: TrainableQuantizerActivationConfig): - """ - Initialize a LSQUniformActivationQATQuantizer object with parameters to use - for the quantization. - - Args: - quantization_config: trainable quantizer config class - """ - super().__init__(quantization_config) - - self.num_bits = quantization_config.activation_n_bits - self.min_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MIN]) - self.max_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MAX]) - self.min_int = 0 - self.max_int = 2**self.num_bits - 1 - - def initialize_quantization(self, - tensor_shape: TensorShape, - name: str, - layer: KerasTrainableQuantizationWrapper): - """ - Add quantizer parameters to the quantizer parameters dictionary - - Args: - tensor_shape: tensor shape of the quantized tensor. - name: Tensor name. - layer: Layer to quantize. - """ - fq_min = layer.add_weight( - name + FQ_MIN, - shape=(), - initializer=tf.keras.initializers.Constant(-1.0), - trainable=True) - fq_min.assign(self.min_range) - - fq_max = layer.add_weight( - name + FQ_MAX, - shape=(), - initializer=tf.keras.initializers.Constant(1.0), - trainable=True) - fq_max.assign(self.max_range) - - # save the quantizer added parameters for later calculations - self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) - self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) - - def __call__(self, - inputs: tf.Tensor, - training: bool): - """ - Quantize a tensor. - Args: - inputs: Input tensor to quantize. - training: Whether the graph is in training mode. - - Returns: - The quantized tensor. - """ - - min_range = self.get_quantizer_variable(FQ_MIN) - max_range = self.get_quantizer_variable(FQ_MAX) - n_channels = inputs.shape[-1] - scale_factor = 1.0 / np.sqrt(self.max_int * n_channels) - q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor) - return q_tensor - - def convert2inferable(self) -> BaseKerasInferableQuantizer: - """ - Convert quantizer to inferable quantizer. - - Returns: - BaseKerasInferableQuantizer object. - """ - min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(), - self.get_quantizer_variable(FQ_MAX).numpy(), - self.num_bits) - return ActivationUniformInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we pass - # the min/max as lists with a len of 1 - min_range=[min_range], - max_range=[max_range]) diff --git a/model_compression_toolkit/qat/keras/quantizer/quantization_builder.py b/model_compression_toolkit/qat/keras/quantizer/quantization_builder.py index e0ee6471b..147e42526 100644 --- a/model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +++ b/model_compression_toolkit/qat/keras/quantizer/quantization_builder.py @@ -15,17 +15,18 @@ from typing import Tuple, Dict, List, Callable from model_compression_toolkit.core import common -from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.logger import Logger from model_compression_toolkit.qat.common.qat_config import QATConfig -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer from mct_quantizers import QuantizationTarget, KerasActivationQuantizationHolder +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \ + BaseKerasQATWeightTrainableQuantizer from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \ get_trainable_quantizer_weights_config, get_trainable_quantizer_activation_config, \ get_trainable_quantizer_quantization_candidates from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \ get_trainable_quantizer_class +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import \ + BaseKerasActivationTrainableQuantizer def get_activation_quantizer_holder(n: common.BaseNode, @@ -55,7 +56,7 @@ def get_activation_quantizer_holder(n: common.BaseNode, def quantization_builder(n: common.BaseNode, qat_config: QATConfig, kernel_attr: str = None, - ) -> Tuple[Dict[str, BaseKerasQATTrainableQuantizer], List[BaseKerasQATTrainableQuantizer]]: + ) -> Tuple[Dict[str, BaseKerasQATWeightTrainableQuantizer], List[BaseKerasActivationTrainableQuantizer]]: """ Build quantizers for a node according to its quantization configuration. @@ -82,7 +83,7 @@ def quantization_builder(n: common.BaseNode, quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Weights, qat_config.weight_training_method, quant_method, - BaseKerasQATTrainableQuantizer) + BaseKerasQATWeightTrainableQuantizer) weight_quantizers.update({kernel_attr: quantizer_class(get_trainable_quantizer_weights_config(n, attr_name=kernel_attr, @@ -98,7 +99,7 @@ def quantization_builder(n: common.BaseNode, quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Activation, qat_config.activation_training_method, quant_method, - BaseKerasQATTrainableQuantizer) + BaseKerasActivationTrainableQuantizer) activation_quantizers = [quantizer_class(get_trainable_quantizer_activation_config(n, aq_cand), **qat_config.activation_quantizer_params_override)] * len(output_shapes) diff --git a/model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py b/model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py index 987dfb133..d41f04c19 100644 --- a/model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +++ b/model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py @@ -18,7 +18,6 @@ import numpy as np import tensorflow as tf from tensorflow.python.framework.tensor_shape import TensorShape -from model_compression_toolkit.constants import SIGNED from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX from model_compression_toolkit.trainable_infrastructure import TrainingMethod @@ -29,18 +28,16 @@ from model_compression_toolkit.qat.common import THRESHOLD_TENSOR from model_compression_toolkit import constants as C -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer -from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \ - TrainableQuantizerActivationConfig -from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, \ - ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig +from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup @mark_quantizer(quantization_target=QuantizationTarget.Weights, quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], identifier=TrainingMethod.STE) -class STEWeightQATQuantizer(BaseKerasQATTrainableQuantizer): +class STEWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer): """ Trainable constrained quantizer to quantize a layer inputs. """ @@ -171,115 +168,3 @@ def convert2inferable(self) -> Union[WeightsPOTInferableQuantizer, WeightsSymmet input_rank=len(self.threshold_shape)) -@mark_quantizer(quantization_target=QuantizationTarget.Activation, - quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], - identifier=TrainingMethod.STE) -class STEActivationQATQuantizer(BaseKerasQATTrainableQuantizer): - """ - Trainable constrained quantizer to quantize a layer outputs. - """ - - def __init__(self, quantization_config: TrainableQuantizerActivationConfig): - """ - Initialize a STEActivationQATQuantizer object with parameters to use - for the quantization. - - Args: - quantization_config: trainable quantizer config class - """ - super().__init__(quantization_config) - self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO - self.threshold_values = quantization_config.activation_quantization_params[C.THRESHOLD] - self.threshold_shape = np.asarray(self.threshold_values).shape - self.np_threshold_values = float(self.threshold_values) - self.signed = quantization_config.activation_quantization_params[SIGNED] - if self.power_of_two: - self.np_threshold_values = np.power(2.0, - np.ceil(np.log2(np.maximum(self.np_threshold_values, C.MIN_THRESHOLD)))) - self.num_bits = quantization_config.activation_n_bits - delta = self.np_threshold_values / np.power(2.0, self.num_bits - int(self.signed)) - min_int = -int(self.signed) * (2 ** (self.num_bits - int(self.signed))) - max_int = (2 ** (self.num_bits - int(self.signed))) - 1 - self.min = delta * min_int - self.max = delta * max_int - - def initialize_quantization(self, - tensor_shape: TensorShape, - name: str, - layer: KerasTrainableQuantizationWrapper): - """ - Add quantizer parameters to the quantizer parameters dictionary - - Args: - tensor_shape: tensor shape of the quantized tensor. - name: Tensor name. - layer: Layer to quantize. - """ - ptq_threshold_tensor = layer.add_weight( - name + THRESHOLD_TENSOR, - shape=(), - initializer=tf.keras.initializers.Constant(1.0), - trainable=False) - ptq_threshold_tensor.assign(self.np_threshold_values) - - fq_min = layer.add_weight( - name + FQ_MIN, - shape=(), - initializer=tf.keras.initializers.Constant(-1.0), - trainable=False) - fq_min.assign(self.min) - - fq_max = layer.add_weight( - name + FQ_MAX, - shape=(), - initializer=tf.keras.initializers.Constant(1.0), - trainable=False) - fq_max.assign(self.max) - - # save the quantizer added parameters for later calculations - self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS) - self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) - self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) - - - def __call__(self, - inputs: tf.Tensor, - training: bool): - """ - Quantize a tensor. - Args: - inputs: Input tensor to quantize. - training: Whether the graph is in training mode. - - Returns: - The quantized tensor. - """ - - _min = self.get_quantizer_variable(FQ_MIN) - _max = self.get_quantizer_variable(FQ_MAX) - q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max, - num_bits=self.num_bits) - - return q_tensor - - def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]: - """ - Convert quantizer to inferable quantizer. - - Returns: - BaseKerasInferableQuantizer object. - """ - - if self.power_of_two: - pot_threshold = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR))) - return ActivationPOTInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we pass - # the threshold as a list with a len of 1 - threshold=[pot_threshold], - signed=self.signed) - else: - return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we - # pass the threshold as a list with a len of 1 - threshold=[self.get_quantizer_variable(THRESHOLD_TENSOR).numpy()], - signed=self.signed) diff --git a/model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py b/model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py index d8c64cb06..642952599 100644 --- a/model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +++ b/model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py @@ -16,28 +16,26 @@ import tensorflow as tf from tensorflow.python.framework.tensor_shape import TensorShape from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \ + BaseKerasQATWeightTrainableQuantizer from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper from model_compression_toolkit.trainable_infrastructure import TrainingMethod from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget -from mct_quantizers.keras.quantizers import \ - BaseKerasInferableQuantizer, WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer +from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer, WeightsUniformInferableQuantizer from model_compression_toolkit.qat.keras.quantizer.quant_utils import adjust_range_to_include_zero from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero -from model_compression_toolkit import constants as C -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer -from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \ - TrainableQuantizerActivationConfig +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup @mark_quantizer(quantization_target=QuantizationTarget.Weights, quantization_method=[QuantizationMethod.UNIFORM], identifier=TrainingMethod.STE) -class STEUniformWeightQATQuantizer(BaseKerasQATTrainableQuantizer): +class STEUniformWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer): """ Trainable constrained quantizer to quantize a layer inputs. """ @@ -148,91 +146,3 @@ def convert2inferable(self) -> BaseKerasInferableQuantizer: input_rank=len(self.min_max_shape)) -@mark_quantizer(quantization_target=QuantizationTarget.Activation, - quantization_method=[QuantizationMethod.UNIFORM], - identifier=TrainingMethod.STE) -class STEUniformActivationQATQuantizer(BaseKerasQATTrainableQuantizer): - """ - Trainable constrained quantizer to quantize a layer outputs. - """ - - def __init__(self, quantization_config: TrainableQuantizerActivationConfig): - """ - Initialize a STEUniformActivationQATQuantizer object with parameters to use - for the quantization. - - Args: - quantization_config: trainable quantizer config class - """ - super().__init__(quantization_config) - - self.num_bits = quantization_config.activation_n_bits - self.min_range = quantization_config.activation_quantization_params[C.RANGE_MIN] - self.max_range = quantization_config.activation_quantization_params[C.RANGE_MAX] - - def initialize_quantization(self, - tensor_shape: TensorShape, - name: str, - layer: KerasTrainableQuantizationWrapper): - """ - Add quantizer parameters to the quantizer parameters dictionary - - Args: - tensor_shape: tensor shape of the quantized tensor. - name: Tensor name. - layer: Layer to quantize. - """ - fq_min = layer.add_weight( - name + FQ_MIN, - shape=(), - initializer=tf.keras.initializers.Constant(-1.0), - trainable=False) - fq_min.assign(self.min_range) - - fq_max = layer.add_weight( - name + FQ_MAX, - shape=(), - initializer=tf.keras.initializers.Constant(1.0), - trainable=False) - fq_max.assign(self.max_range) - - # save the quantizer added parameters for later calculations - self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) - self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) - - def __call__(self, - inputs: tf.Tensor, - training: bool): - """ - Quantize a tensor. - Args: - inputs: Input tensor to quantize. - training: Whether the graph is in training mode. - - Returns: - The quantized tensor. - """ - - _min = self.get_quantizer_variable(FQ_MIN) - _max = self.get_quantizer_variable(FQ_MAX) - _min, _max = adjust_range_to_include_zero(_min, _max, self.num_bits) - q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max, - num_bits=self.num_bits) - - return q_tensor - - def convert2inferable(self) -> BaseKerasInferableQuantizer: - """ - Convert quantizer to inferable quantizer. - - Returns: - BaseKerasInferableQuantizer object. - """ - min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(), - self.get_quantizer_variable(FQ_MAX).numpy(), - self.num_bits) - return ActivationUniformInferableQuantizer(num_bits=self.num_bits, - # In activation quantization is per-tensor only - thus we pass - # the min/max as lists with a len of 1 - min_range=[min_range], - max_range=[max_range]) diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py new file mode 100644 index 000000000..d59fed947 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from .base_activation_quantizer import BaseKerasActivationTrainableQuantizer +from .ste.symmetric_ste import STESymmetricActivationTrainableQuantizer +from .ste.uniform_ste import STEUniformActivationTrainableQuantizer +from .lsq.symmetric_lsq import LSQSymmetricActivationTrainableQuantizer +from .lsq.uniform_lsq import LSQUniformActivationTrainableQuantizer diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py new file mode 100644 index 000000000..7ccebe481 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py @@ -0,0 +1,22 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from abc import ABC + +from model_compression_toolkit.trainable_infrastructure import BaseKerasTrainableQuantizer + + +class BaseKerasActivationTrainableQuantizer(BaseKerasTrainableQuantizer, ABC): + """ Base class for Keras trainable activation quantizers """ + pass diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py new file mode 100644 index 000000000..e11a7cc60 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py new file mode 100644 index 000000000..1be5f2a94 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py @@ -0,0 +1,127 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import symmetric_lsq_quantizer +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import BaseKerasActivationTrainableQuantizer +from typing import Union + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework.tensor_shape import TensorShape +from model_compression_toolkit.constants import SIGNED + +from model_compression_toolkit.trainable_infrastructure import TrainingMethod + +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper +from mct_quantizers import QuantizationTarget, mark_quantizer +from model_compression_toolkit.qat.common import THRESHOLD_TENSOR +from model_compression_toolkit import constants as C + +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerActivationConfig +from mct_quantizers.keras.quantizers import ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer +from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup + +@mark_quantizer(quantization_target=QuantizationTarget.Activation, + quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], + identifier=TrainingMethod.LSQ) +class LSQSymmetricActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer): + """ + Trainable constrained quantizer to quantize layer activations. + """ + + def __init__(self, quantization_config: TrainableQuantizerActivationConfig): + """ + Initialize a LSQActivationQATQuantizer object with parameters to use + for the quantization. + + Args: + quantization_config: trainable quantizer config class + """ + super().__init__(quantization_config) + self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO + self.threshold_values = float(quantization_config.activation_quantization_params[C.THRESHOLD]) + self.threshold_shape = np.asarray(self.threshold_values).shape + self.sign = quantization_config.activation_quantization_params[SIGNED] + self.num_bits = quantization_config.activation_n_bits + n_pos_bits = self.num_bits - int(self.sign) + self.min_int = -int(self.sign) * (2 ** n_pos_bits) + self.max_int = (2 ** n_pos_bits) - 1 + if self.power_of_two: + self.threshold_values = np.power(2.0, np.ceil(np.log2(np.maximum(self.threshold_values, C.MIN_THRESHOLD)))) + + + def initialize_quantization(self, + tensor_shape: TensorShape, + name: str, + layer: KerasTrainableQuantizationWrapper): + """ + Add quantizer parameters to the quantizer parameters dictionary + + Args: + tensor_shape: tensor shape of the quantized tensor. + name: Tensor name. + layer: Layer to quantize. + """ + ptq_threshold_tensor = layer.add_weight( + name + THRESHOLD_TENSOR, + shape=(), + initializer=tf.keras.initializers.Constant(1.0), + trainable=True) + ptq_threshold_tensor.assign(self.threshold_values) + + # save the quantizer added parameters for later calculations + self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS) + + def __call__(self, + inputs: tf.Tensor, + training: bool): + """ + Quantize a tensor. + Args: + inputs: Input tensor to quantize. + training: Whether the graph is in training mode. + + Returns: + The quantized tensor. + """ + + thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR) + n_channels = inputs.shape[-1] + scale_factor = 1.0 / np.sqrt(self.max_int * n_channels) + q_tensor = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor) + return q_tensor + + def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]: + """ + Convert quantizer to inferable quantizer. + + Returns: + BaseKerasInferableQuantizer object. + """ + + if self.power_of_two: + thresholds = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR).numpy())) + return ActivationPOTInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we pass + # the threshold as a list with a len of 1 + threshold=[thresholds], + signed=self.sign) + else: + thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR).numpy() + return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we + # pass the threshold as a list with a len of 1 + threshold=[thresholds], + signed=self.sign) \ No newline at end of file diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py new file mode 100644 index 000000000..60ddd9a46 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py @@ -0,0 +1,129 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework.tensor_shape import TensorShape +from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX +from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import uniform_lsq_quantizer +from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX +from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper +from model_compression_toolkit.trainable_infrastructure import TrainingMethod + +from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget +from mct_quantizers.keras.quantizers import \ + BaseKerasInferableQuantizer, WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer + +from model_compression_toolkit import constants as C + +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \ + TrainableQuantizerActivationConfig +from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup +from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import \ + BaseKerasActivationTrainableQuantizer + + +@mark_quantizer(quantization_target=QuantizationTarget.Activation, + quantization_method=[QuantizationMethod.UNIFORM], + identifier=TrainingMethod.LSQ) +class LSQUniformActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer): + """ + Trainable constrained quantizer to quantize layer activations. + """ + + def __init__(self, quantization_config: TrainableQuantizerActivationConfig): + """ + Initialize a LSQUniformActivationQATQuantizer object with parameters to use + for the quantization. + + Args: + quantization_config: trainable quantizer config class + """ + super().__init__(quantization_config) + + self.num_bits = quantization_config.activation_n_bits + self.min_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MIN]) + self.max_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MAX]) + self.min_int = 0 + self.max_int = 2**self.num_bits - 1 + + def initialize_quantization(self, + tensor_shape: TensorShape, + name: str, + layer: KerasTrainableQuantizationWrapper): + """ + Add quantizer parameters to the quantizer parameters dictionary + + Args: + tensor_shape: tensor shape of the quantized tensor. + name: Tensor name. + layer: Layer to quantize. + """ + fq_min = layer.add_weight( + name + FQ_MIN, + shape=(), + initializer=tf.keras.initializers.Constant(-1.0), + trainable=True) + fq_min.assign(self.min_range) + + fq_max = layer.add_weight( + name + FQ_MAX, + shape=(), + initializer=tf.keras.initializers.Constant(1.0), + trainable=True) + fq_max.assign(self.max_range) + + # save the quantizer added parameters for later calculations + self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) + self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) + + def __call__(self, + inputs: tf.Tensor, + training: bool): + """ + Quantize a tensor. + Args: + inputs: Input tensor to quantize. + training: Whether the graph is in training mode. + + Returns: + The quantized tensor. + """ + + min_range = self.get_quantizer_variable(FQ_MIN) + max_range = self.get_quantizer_variable(FQ_MAX) + n_channels = inputs.shape[-1] + scale_factor = 1.0 / np.sqrt(self.max_int * n_channels) + q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor) + return q_tensor + + def convert2inferable(self) -> BaseKerasInferableQuantizer: + """ + Convert quantizer to inferable quantizer. + + Returns: + BaseKerasInferableQuantizer object. + """ + min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(), + self.get_quantizer_variable(FQ_MAX).numpy(), + self.num_bits) + return ActivationUniformInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we pass + # the min/max as lists with a len of 1 + min_range=[min_range], + max_range=[max_range]) + + diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py new file mode 100644 index 000000000..e11a7cc60 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py new file mode 100644 index 000000000..691f326a4 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py @@ -0,0 +1,148 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from typing import Union + +import numpy as np +import tensorflow as tf + +from mct_quantizers import mark_quantizer, QuantizationTarget, QuantizationMethod +from mct_quantizers.keras.quantizers import ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer +from model_compression_toolkit import constants as C +from model_compression_toolkit.trainable_infrastructure import TrainingMethod, TrainableQuantizerActivationConfig +from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup +from model_compression_toolkit.trainable_infrastructure.common.constants import THRESHOLD_TENSOR +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import BaseKerasActivationTrainableQuantizer +from model_compression_toolkit.constants import SIGNED +from tensorflow.python.framework.tensor_shape import TensorShape +from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper +from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX + + +# moved (and renamed) from model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +@mark_quantizer(quantization_target=QuantizationTarget.Activation, + quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC], + identifier=TrainingMethod.STE) +class STESymmetricActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer): + + """ + Trainable constrained quantizer to quantize a layer outputs. + """ + + def __init__(self, quantization_config: TrainableQuantizerActivationConfig, freeze_quant_params: bool = False): + """ + Initialize a STESymmetricActivationTrainableQuantizer object with parameters to use + for the quantization. + + Args: + quantization_config: trainable quantizer config class + freeze_quant_params: whether to freeze learnable quantization parameters. This is unused here, since there is not any quantizaiton params that are learned. + """ + super().__init__(quantization_config, freeze_quant_params) + self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO + self.threshold_values = quantization_config.activation_quantization_params[C.THRESHOLD] + self.threshold_shape = np.asarray(self.threshold_values).shape + self.np_threshold_values = float(self.threshold_values) + self.signed = quantization_config.activation_quantization_params[SIGNED] + if self.power_of_two: + self.np_threshold_values = np.power(2.0, + np.ceil( + np.log2(np.maximum(self.np_threshold_values, C.MIN_THRESHOLD)))) + self.num_bits = quantization_config.activation_n_bits + delta = self.np_threshold_values / np.power(2.0, self.num_bits - int(self.signed)) + min_int = -int(self.signed) * (2 ** (self.num_bits - int(self.signed))) + max_int = (2 ** (self.num_bits - int(self.signed))) - 1 + self.min = delta * min_int + self.max = delta * max_int + + def initialize_quantization(self, + tensor_shape: TensorShape, + name: str, + layer: KerasTrainableQuantizationWrapper): + """ + Add quantizer parameters to the quantizer parameters dictionary + + Args: + tensor_shape: tensor shape of the quantized tensor. + name: Tensor name. + layer: Layer to quantize. + """ + ptq_threshold_tensor = layer.add_weight( + name + THRESHOLD_TENSOR, + shape=(), + initializer=tf.keras.initializers.Constant(1.0), + trainable=False) + ptq_threshold_tensor.assign(self.np_threshold_values) + + fq_min = layer.add_weight( + name + FQ_MIN, + shape=(), + initializer=tf.keras.initializers.Constant(-1.0), + trainable=False) + fq_min.assign(self.min) + + fq_max = layer.add_weight( + name + FQ_MAX, + shape=(), + initializer=tf.keras.initializers.Constant(1.0), + trainable=False) + fq_max.assign(self.max) + + # save the quantizer added parameters for later calculations + self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS) + self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) + self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) + + def __call__(self, + inputs: tf.Tensor, + training: bool): + """ + Quantize a tensor. + Args: + inputs: Input tensor to quantize. + training: Whether the graph is in training mode. + + Returns: + The quantized tensor. + """ + + _min = self.get_quantizer_variable(FQ_MIN) + _max = self.get_quantizer_variable(FQ_MAX) + q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max, + num_bits=self.num_bits) + + return q_tensor + + def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]: + """ + Convert quantizer to inferable quantizer. + + Returns: + BaseKerasInferableQuantizer object. + """ + + if self.power_of_two: + pot_threshold = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR))) + return ActivationPOTInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we pass + # the threshold as a list with a len of 1 + threshold=[pot_threshold], + signed=self.signed) + else: + return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we + # pass the threshold as a list with a len of 1 + threshold=[ + self.get_quantizer_variable(THRESHOLD_TENSOR).numpy()], + signed=self.signed) diff --git a/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py new file mode 100644 index 000000000..93e368694 --- /dev/null +++ b/model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py @@ -0,0 +1,122 @@ +# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import tensorflow as tf +from tensorflow.python.framework.tensor_shape import TensorShape +from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX +from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper +from model_compression_toolkit.trainable_infrastructure import TrainingMethod + +from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget +from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer, ActivationUniformInferableQuantizer + +from model_compression_toolkit.qat.keras.quantizer.quant_utils import adjust_range_to_include_zero +from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero +from model_compression_toolkit import constants as C + +from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerActivationConfig +from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import BaseKerasActivationTrainableQuantizer + + +@mark_quantizer(quantization_target=QuantizationTarget.Activation, + quantization_method=[QuantizationMethod.UNIFORM], + identifier=TrainingMethod.STE) +class STEUniformActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer): + """ + Trainable constrained quantizer to quantize a layer outputs. + """ + + def __init__(self, quantization_config: TrainableQuantizerActivationConfig, freeze_quant_params: bool = False): + """ + Initialize a STEUniformActivationTrainableQuantizer object with parameters to use + for the quantization. + + Args: + quantization_config: trainable quantizer config class + freeze_quant_params: whether to freeze learnable quantization parameters. This is unused here, since there is not any quantizaiton params that are learned. + + """ + super().__init__(quantization_config, freeze_quant_params) + + self.num_bits = quantization_config.activation_n_bits + self.min_range = quantization_config.activation_quantization_params[C.RANGE_MIN] + self.max_range = quantization_config.activation_quantization_params[C.RANGE_MAX] + + def initialize_quantization(self, + tensor_shape: TensorShape, + name: str, + layer: KerasTrainableQuantizationWrapper): + """ + Add quantizer parameters to the quantizer parameters dictionary + + Args: + tensor_shape: tensor shape of the quantized tensor. + name: Tensor name. + layer: Layer to quantize. + """ + fq_min = layer.add_weight( + name + FQ_MIN, + shape=(), + initializer=tf.keras.initializers.Constant(-1.0), + trainable=False) + fq_min.assign(self.min_range) + + fq_max = layer.add_weight( + name + FQ_MAX, + shape=(), + initializer=tf.keras.initializers.Constant(1.0), + trainable=False) + fq_max.assign(self.max_range) + + # save the quantizer added parameters for later calculations + self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS) + self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS) + + def __call__(self, + inputs: tf.Tensor, + training: bool): + """ + Quantize a tensor. + Args: + inputs: Input tensor to quantize. + training: Whether the graph is in training mode. + + Returns: + The quantized tensor. + """ + + _min = self.get_quantizer_variable(FQ_MIN) + _max = self.get_quantizer_variable(FQ_MAX) + _min, _max = adjust_range_to_include_zero(_min, _max, self.num_bits) + q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max, + num_bits=self.num_bits) + + return q_tensor + + def convert2inferable(self) -> BaseKerasInferableQuantizer: + """ + Convert quantizer to inferable quantizer. + + Returns: + BaseKerasInferableQuantizer object. + """ + min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(), + self.get_quantizer_variable(FQ_MAX).numpy(), + self.num_bits) + return ActivationUniformInferableQuantizer(num_bits=self.num_bits, + # In activation quantization is per-tensor only - thus we pass + # the min/max as lists with a len of 1 + min_range=[min_range], + max_range=[max_range]) diff --git a/model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py b/model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py index 2be7d15ff..33d5ab01f 100644 --- a/model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +++ b/model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py @@ -28,16 +28,10 @@ import tensorflow as tf class BaseKerasTrainableQuantizer(BaseTrainableQuantizer): - def __init__(self, - quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]): - """ - This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement. - This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model. - - Args: - quantization_config: quantizer config class contains all the information about a quantizer configuration. - """ - super().__init__(quantization_config) + """ + This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement. + This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model. + """ def get_config(self) -> Dict[str, Any]: """ @@ -77,6 +71,14 @@ def get_trainable_variables(self, group: VariableGroup) -> List[tf.Tensor]: quantizer_parameter, parameter_group = parameter_dict[VAR], parameter_dict[GROUP] if quantizer_parameter.trainable and parameter_group == group: quantizer_trainable.append(quantizer_parameter) + + # sanity check to catch inconsistent initialization + if self.freeze_quant_params and group == VariableGroup.QPARAMS and quantizer_trainable: + Logger.critical( + 'Found trainable quantization params despite self.freeze_quant_params=True. ' + 'Quantization parameters were probably not initialized correctly in the Quantizer.' + ) # pragma: no cover + return quantizer_trainable diff --git a/model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py b/model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py index 2c1a35ae9..012c2d52a 100644 --- a/model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py +++ b/model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py @@ -16,6 +16,9 @@ import numpy as np import tensorflow as tf +from model_compression_toolkit.qat.keras.quantizer.quant_utils import grad_scale, ste_round, \ + adjust_range_to_include_zero + def int_quantization_with_threshold(data: tf.Tensor, n_bits: int, @@ -46,3 +49,60 @@ def int_quantization_with_threshold(data: tf.Tensor, return tf.clip_by_value((data / (threshold + eps)) * (2 ** (n_bits - int(signed))), clip_value_max=clip_max, clip_value_min=clip_min) + + +def symmetric_lsq_quantizer(x: tf.Tensor, + thresholds: tf.Tensor, + num_bits: int, + sign: bool, + min_int: int, + max_int:int, + scale_factor: float) -> tf.Tensor: + """ + Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf + Args: + x: input to quantize + thresholds: thresholds of quantization levels + num_bits: number of bits for quantization + sign: whether x is signed or not + min_int: min clipping integer value + max_int: max clipping integer value + scale_factor: grad scale of LSQ algorithm + Returns: + A quantized tensor + """ + delta = thresholds / (2 ** (num_bits - int(sign))) + delta_scaled = grad_scale(delta, scale_factor) + rounded = ste_round(x / delta_scaled) + clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int) + quantized = delta_scaled * clipped + return quantized + + +def uniform_lsq_quantizer(x: tf.Tensor, + min_range: tf.Tensor, + max_range: tf.Tensor, + num_bits: int, + min_int: int, + max_int:int, + scale_factor: float) -> tf.Tensor: + """ + Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf + Args: + x: input to quantize + min_range: min range of quantization values + max_range: min range of quantization values + num_bits: number of bits for quantization + min_int: min clipping integer value + max_int: max clipping integer value + scale_factor: grad scale of LSQ algorithm + Returns: + A quantized tensor + """ + min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits) + delta = (max_range - min_range) / (2 ** num_bits - 1) + delta_scaled = grad_scale(delta, scale_factor) + rounded = ste_round((x-min_range) / delta_scaled) + clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int) + quantized = delta_scaled * clipped + min_range + return quantized diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py index 275a12917..bd73659aa 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py @@ -23,10 +23,13 @@ from mct_quantizers.common.base_inferable_quantizer import QuantizerID from mct_quantizers.common.get_all_subclasses import get_all_subclasses from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer -from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer +from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \ + BaseKerasQATWeightTrainableQuantizer from model_compression_toolkit.trainable_infrastructure import TrainingMethod, KerasTrainableQuantizationWrapper, \ BaseKerasTrainableQuantizer from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import \ + BaseKerasActivationTrainableQuantizer from model_compression_toolkit.trainable_infrastructure.keras.load_model import \ keras_load_quantized_model from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest @@ -147,7 +150,7 @@ def compare(self, quantized_model, float_model, loaded_model, input_x=None, quan holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) self.unit_test.assertTrue(len(holder_layers)==2) for holder_layer in holder_layers: - self.unit_test.assertTrue(holder_layer.get_config()['activation_holder_quantizer']['class_name'] == 'STEActivationQATQuantizer') + self.unit_test.assertTrue(holder_layer.get_config()['activation_holder_quantizer']['class_name'] == 'STESymmetricActivationTrainableQuantizer') self.unit_test.assertTrue(holder_layer.get_config()['activation_holder_quantizer']['config']['quantization_config']['activation_n_bits'] == 4) # Assert weights of quantizer were added to the quantization holder layer @@ -233,7 +236,9 @@ def run_test(self, **kwargs): self.unit_test.assertTrue(np.isclose(np.linalg.norm(out_qat_finalize_model - out_ptq_model) / np.linalg.norm(out_ptq_model), 0, atol=1e-6)) def compare(self, qat_model, finalize=False, input_x=None, quantization_info=None): - all_trainable_quantizers = get_all_subclasses(BaseKerasQATTrainableQuantizer) + all_trainable_quantizers = get_all_subclasses(BaseKerasQATWeightTrainableQuantizer) + all_trainable_quantizers = all_trainable_quantizers.union(get_all_subclasses(BaseKerasActivationTrainableQuantizer)) + all_inferable_quantizers = get_all_subclasses(BaseKerasInferableQuantizer) for layer in qat_model.layers: if isinstance(layer, KerasActivationQuantizationHolder): diff --git a/tests/keras_tests/trainable_infrastructure_tests/test_keras_trainable_infra_runner.py b/tests/keras_tests/trainable_infrastructure_tests/test_keras_trainable_infra_runner.py index 19694d92a..35366d02c 100644 --- a/tests/keras_tests/trainable_infrastructure_tests/test_keras_trainable_infra_runner.py +++ b/tests/keras_tests/trainable_infrastructure_tests/test_keras_trainable_infra_runner.py @@ -18,12 +18,14 @@ from model_compression_toolkit.trainable_infrastructure import TrainingMethod from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod from mct_quantizers import QuantizationTarget -from model_compression_toolkit.qat.keras.quantizer.ste_rounding.symmetric_ste import STEWeightQATQuantizer, \ - STEActivationQATQuantizer -from model_compression_toolkit.qat.keras.quantizer.ste_rounding.uniform_ste import STEUniformWeightQATQuantizer, \ - STEUniformActivationQATQuantizer -from model_compression_toolkit.qat.keras.quantizer.lsq.uniform_lsq import LSQUniformActivationQATQuantizer, LSQUniformWeightQATQuantizer -from model_compression_toolkit.qat.keras.quantizer.lsq.symmetric_lsq import LSQActivationQATQuantizer, LSQWeightQATQuantizer +from model_compression_toolkit.qat.keras.quantizer.ste_rounding.symmetric_ste import STEWeightQATQuantizer +from model_compression_toolkit.qat.keras.quantizer.ste_rounding.uniform_ste import STEUniformWeightQATQuantizer +from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import \ + STESymmetricActivationTrainableQuantizer, STEUniformActivationTrainableQuantizer, \ + LSQUniformActivationTrainableQuantizer, LSQSymmetricActivationTrainableQuantizer + +from model_compression_toolkit.qat.keras.quantizer.lsq.uniform_lsq import LSQUniformWeightQATQuantizer +from model_compression_toolkit.qat.keras.quantizer.lsq.symmetric_lsq import LSQWeightQATQuantizer from model_compression_toolkit.trainable_infrastructure import BaseKerasTrainableQuantizer from tests.keras_tests.trainable_infrastructure_tests.trainable_keras.test_get_quantizers import \ TestGetTrainableQuantizer @@ -60,17 +62,17 @@ def test_get_quantizers(self): quant_method=QuantizationMethod.POWER_OF_TWO, quantizer_base_class=BaseKerasTrainableQuantizer, quantizer_id=TrainingMethod.STE, - expected_quantizer_class=STEActivationQATQuantizer).run_test() + expected_quantizer_class=STESymmetricActivationTrainableQuantizer).run_test() TestGetTrainableQuantizer(self, quant_target=QuantizationTarget.Activation, quant_method=QuantizationMethod.SYMMETRIC, quantizer_base_class=BaseKerasTrainableQuantizer, quantizer_id=TrainingMethod.STE, - expected_quantizer_class=STEActivationQATQuantizer).run_test() + expected_quantizer_class=STESymmetricActivationTrainableQuantizer).run_test() TestGetTrainableQuantizer(self, quant_target=QuantizationTarget.Activation, quant_method=QuantizationMethod.UNIFORM, quantizer_base_class=BaseKerasTrainableQuantizer, quantizer_id=TrainingMethod.STE, - expected_quantizer_class=STEUniformActivationQATQuantizer).run_test() + expected_quantizer_class=STEUniformActivationTrainableQuantizer).run_test() TestGetTrainableQuantizer(self, quant_target=QuantizationTarget.Weights, quant_method=QuantizationMethod.SYMMETRIC, quantizer_base_class=BaseKerasTrainableQuantizer, @@ -85,12 +87,12 @@ def test_get_quantizers(self): quant_method=QuantizationMethod.SYMMETRIC, quantizer_base_class=BaseKerasTrainableQuantizer, quantizer_id=TrainingMethod.LSQ, - expected_quantizer_class=LSQActivationQATQuantizer).run_test() + expected_quantizer_class=LSQSymmetricActivationTrainableQuantizer).run_test() TestGetTrainableQuantizer(self, quant_target=QuantizationTarget.Activation, quant_method=QuantizationMethod.UNIFORM, quantizer_base_class=BaseKerasTrainableQuantizer, quantizer_id=TrainingMethod.LSQ, - expected_quantizer_class=LSQUniformActivationQATQuantizer).run_test() + expected_quantizer_class=LSQUniformActivationTrainableQuantizer).run_test()