From 2f794ad79a69ee23825fe4fc2e106d671629a492 Mon Sep 17 00:00:00 2001 From: liord Date: Sun, 24 Nov 2024 17:14:10 +0200 Subject: [PATCH] Refactor Target Platform Capabilities Design - Create a new `schema` package to house all target platform modeling classes - Introduce a new versioning system with minor and patch versions Additional Changes: - Update existing target platform models to adhere to the new versioning convention - Add necessary metadata - Correct all import statements - Update and enhance tests to reflect the design changes --- model_compression_toolkit/constants.py | 1 + .../core/common/graph/base_node.py | 5 +- .../resource_utilization_data.py | 4 +- .../candidate_node_quantization_config.py | 4 +- .../quantization/node_quantization_config.py | 4 +- .../qparams_activations_computation.py | 3 +- .../set_node_quantization_config.py | 2 +- ...ply_activation_bias_correction_to_graph.py | 2 +- .../apply_bias_correction_to_graph.py | 2 +- .../substitutions/batchnorm_reconstruction.py | 4 +- .../shift_negative_activation.py | 4 +- model_compression_toolkit/metadata.py | 11 +- .../schema/__init__.py | 14 + .../target_platform_capabilities/schema/v1.py | 727 ++++++++++++++++++ .../target_platform/__init__.py | 8 +- .../target_platform/fusing.py | 69 -- .../target_platform/op_quantization_config.py | 362 --------- .../target_platform/operators.py | 71 -- .../target_platform/target_platform_model.py | 204 +---- .../operations_to_layers.py | 3 +- .../target_platform_capabilities.py | 15 +- .../tpc_models/imx500_tpc/v1/tp_model.py | 64 +- .../tpc_models/imx500_tpc/v1/tpc_keras.py | 5 +- .../tpc_models/imx500_tpc/v1/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v1_lut/tp_model.py | 64 +- .../tpc_models/imx500_tpc/v1_lut/tpc_keras.py | 5 +- .../imx500_tpc/v1_lut/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v1_pot/tp_model.py | 65 +- .../tpc_models/imx500_tpc/v1_pot/tpc_keras.py | 5 +- .../imx500_tpc/v1_pot/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v2/tp_model.py | 64 +- .../tpc_models/imx500_tpc/v2/tpc_keras.py | 5 +- .../tpc_models/imx500_tpc/v2/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v2_lut/tp_model.py | 64 +- .../tpc_models/imx500_tpc/v2_lut/tpc_keras.py | 5 +- .../imx500_tpc/v2_lut/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v3/tp_model.py | 74 +- .../tpc_models/imx500_tpc/v3/tpc_keras.py | 5 +- .../tpc_models/imx500_tpc/v3/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v3_lut/tp_model.py | 66 +- .../tpc_models/imx500_tpc/v3_lut/tpc_keras.py | 5 +- .../imx500_tpc/v3_lut/tpc_pytorch.py | 7 +- .../tpc_models/imx500_tpc/v4/tp_model.py | 115 +-- .../tpc_models/imx500_tpc/v4/tpc_keras.py | 5 +- .../tpc_models/imx500_tpc/v4/tpc_pytorch.py | 7 +- .../tpc_models/qnnpack_tpc/v1/tp_model.py | 36 +- .../tpc_models/qnnpack_tpc/v1/tpc_keras.py | 7 +- .../tpc_models/qnnpack_tpc/v1/tpc_pytorch.py | 7 +- .../tpc_models/tflite_tpc/v1/tp_model.py | 82 +- .../tpc_models/tflite_tpc/v1/tpc_keras.py | 7 +- .../tpc_models/tflite_tpc/v1/tpc_pytorch.py | 7 +- .../helpers/generate_test_tp_model.py | 65 +- tests/common_tests/test_tp_model.py | 117 ++- .../tflite_int8/imx500_int8_tp_model.py | 69 +- .../bn_attributes_quantization_test.py | 71 +- .../const_quantization_test.py | 5 +- .../feature_networks/mixed_precision_tests.py | 19 +- .../weights_mixed_precision_tests.py | 36 +- .../function_tests/test_custom_layer.py | 40 +- .../function_tests/test_hmse_error_method.py | 14 +- .../function_tests/test_layer_fusing.py | 68 +- .../test_node_quantization_configurations.py | 2 +- .../non_parallel_tests/test_keras_tp_model.py | 120 +-- .../function_tests/layer_fusing_test.py | 58 +- .../function_tests/test_pytorch_tp_model.py | 101 ++- .../bn_attributes_quantization_test.py | 71 +- .../feature_models/const_quantization_test.py | 39 +- .../mixed_precision_activation_test.py | 10 +- .../mixed_precision_weights_test.py | 25 +- 69 files changed, 1750 insertions(+), 1517 deletions(-) create mode 100644 model_compression_toolkit/target_platform_capabilities/schema/__init__.py create mode 100644 model_compression_toolkit/target_platform_capabilities/schema/v1.py diff --git a/model_compression_toolkit/constants.py b/model_compression_toolkit/constants.py index 7d5817036..53000c3e1 100644 --- a/model_compression_toolkit/constants.py +++ b/model_compression_toolkit/constants.py @@ -21,6 +21,7 @@ # Metadata fields MCT_VERSION = 'mct_version' TPC_VERSION = 'tpc_version' +TPC_SCHEMA = 'tpc_schema' WEIGHTS_SIGNED = True # Minimal threshold to use for quantization ranges: diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py index b90bc6a87..de60617e6 100644 --- a/model_compression_toolkit/core/common/graph/base_node.py +++ b/model_compression_toolkit/core/common/graph/base_node.py @@ -22,8 +22,9 @@ ACTIVATION_N_BITS_ATTRIBUTE, FP32_BYTES_PER_PARAMETER from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig from model_compression_toolkit.logger import Logger -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformCapabilities, LayerFilterParams, OpQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, LayerFilterParams +from model_compression_toolkit.target_platform_capabilities.schema.v1 import OpQuantizationConfig, \ + QuantizationConfigOptions class BaseNode: diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py index ff3a27c87..67ec4df40 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py @@ -23,8 +23,8 @@ from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner -from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, \ - QuantizationConfigOptions +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities +from model_compression_toolkit.target_platform_capabilities.schema.v1 import QuantizationConfigOptions def compute_resource_utilization_data(in_model: Any, diff --git a/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py b/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py index 41c993e6a..33b1b47e7 100644 --- a/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py @@ -17,8 +17,8 @@ from model_compression_toolkit.core import QuantizationConfig from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \ NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig, \ + OpQuantizationConfig from model_compression_toolkit.logger import Logger diff --git a/model_compression_toolkit/core/common/quantization/node_quantization_config.py b/model_compression_toolkit/core/common/quantization/node_quantization_config.py index a790cbc77..069848248 100644 --- a/model_compression_toolkit/core/common/quantization/node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/node_quantization_config.py @@ -25,8 +25,8 @@ from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig, \ QuantizationErrorMethod -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig, \ + OpQuantizationConfig ########################################## diff --git a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py index 8c9c759d2..324b8f603 100644 --- a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +++ b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py @@ -15,7 +15,8 @@ import numpy as np from typing import Dict, Union -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, Signedness +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector from model_compression_toolkit.core.common.quantization import quantization_params_generation from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo diff --git a/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py b/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py index e005f1a2e..6e489aaaa 100644 --- a/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/set_node_quantization_config.py @@ -33,7 +33,7 @@ from model_compression_toolkit.core.common.quantization.quantization_fn_selection import \ get_weights_quantization_fn from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \ +from model_compression_toolkit.target_platform_capabilities.schema.v1 import OpQuantizationConfig, \ QuantizationConfigOptions diff --git a/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py b/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py index 293e3dcce..cc473e48f 100644 --- a/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py +++ b/model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py @@ -17,7 +17,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig def apply_activation_bias_correction_to_graph(graph: Graph, diff --git a/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py b/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py index 3a37a97d9..f22128b09 100644 --- a/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py +++ b/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py @@ -19,7 +19,7 @@ from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph, diff --git a/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py b/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py index 482fa4fce..b995b9335 100644 --- a/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +++ b/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py @@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph from model_compression_toolkit.core.common.graph.base_node import BaseNode from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig class BatchNormalizationReconstruction(common.BaseSubstitution): diff --git a/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py b/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py index f5f643295..da4af3368 100644 --- a/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +++ b/model_compression_toolkit/core/common/substitutions/shift_negative_activation.py @@ -22,8 +22,8 @@ from model_compression_toolkit.core.common import FrameworkInfo, Graph, BaseNode from model_compression_toolkit.constants import THRESHOLD, SIGNED, SHIFT_NEGATIVE_NON_LINEAR_NUM_BITS from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod, \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig from model_compression_toolkit.core.common.quantization.set_node_quantization_config import create_node_activation_qc, \ set_quantization_configs_to_node from model_compression_toolkit.core.common.quantization.core_config import CoreConfig diff --git a/model_compression_toolkit/metadata.py b/model_compression_toolkit/metadata.py index bcbd73e76..d26be8e67 100644 --- a/model_compression_toolkit/metadata.py +++ b/model_compression_toolkit/metadata.py @@ -15,7 +15,7 @@ from typing import Dict, Any from model_compression_toolkit.constants import MCT_VERSION, TPC_VERSION, OPERATORS_SCHEDULING, FUSED_NODES_MAPPING, \ - CUTS, MAX_CUT, OP_ORDER, OP_RECORD, SHAPE, NODE_OUTPUT_INDEX, NODE_NAME, TOTAL_SIZE, MEM_ELEMENTS + CUTS, MAX_CUT, OP_ORDER, OP_RECORD, SHAPE, NODE_OUTPUT_INDEX, NODE_NAME, TOTAL_SIZE, MEM_ELEMENTS, TPC_SCHEMA from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import SchedulerInfo from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities @@ -43,13 +43,16 @@ def create_model_metadata(tpc: TargetPlatformCapabilities, def get_versions_dict(tpc) -> Dict: """ - Returns: A dictionary with TPC and MCT versions. + Returns: A dictionary with TPC, MCT and TPC-Schema versions. """ # imported inside to avoid circular import error from model_compression_toolkit import __version__ as mct_version - tpc_version = f'{tpc.name}.{tpc.version}' - return {MCT_VERSION: mct_version, TPC_VERSION: tpc_version} + tpc_version = f'{tpc.tp_model.tpc_minor_version}.{tpc.tp_model.tpc_patch_version}' + tpc_schema = f'{tpc.tp_model.SCHEMA_VERSION}' + return {MCT_VERSION: mct_version, + TPC_VERSION: tpc_version, + TPC_SCHEMA: tpc_schema} def get_scheduler_metadata(scheduler_info: SchedulerInfo) -> Dict[str, Any]: diff --git a/model_compression_toolkit/target_platform_capabilities/schema/__init__.py b/model_compression_toolkit/target_platform_capabilities/schema/__init__.py new file mode 100644 index 000000000..ea3047f32 --- /dev/null +++ b/model_compression_toolkit/target_platform_capabilities/schema/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/model_compression_toolkit/target_platform_capabilities/schema/v1.py b/model_compression_toolkit/target_platform_capabilities/schema/v1.py new file mode 100644 index 000000000..cdb6b5230 --- /dev/null +++ b/model_compression_toolkit/target_platform_capabilities/schema/v1.py @@ -0,0 +1,727 @@ +# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import copy + +from enum import Enum + +import pprint + +from typing import Dict, Any, Union, Tuple, List, Optional + +from mct_quantizers import QuantizationMethod +from model_compression_toolkit.constants import FLOAT_BITWIDTH + +from model_compression_toolkit.logger import Logger +from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST +from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass +from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import \ + get_current_tp_model, _current_tp_model +from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import \ + TargetPlatformModelComponent + + +class Signedness(Enum): + """ + An enum for choosing the signedness of the quantization method: + + AUTO - Signedness decided automatically by quantization. + SIGNED - Force signed quantization. + UNSIGNED - Force unsigned quantization. + """ + AUTO = 0 + SIGNED = 1 + UNSIGNED = 2 + + +def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any: + """ + Clones the given object and edit some of its parameters. + + Args: + obj: An object to clone. + **kwargs: Keyword arguments to edit in the cloned object. + + Returns: + Edited copy of the given object. + """ + + obj_copy = copy.deepcopy(obj) + for k, v in kwargs.items(): + assert hasattr(obj_copy, + k), f'Edit parameter is possible only for existing parameters in the given object, ' \ + f'but {k} is not a parameter of {obj_copy}.' + setattr(obj_copy, k, v) + return obj_copy + + +class AttributeQuantizationConfig: + """ + Hold the quantization configuration of a weight attribute of a layer. + """ + def __init__(self, + weights_quantization_method: QuantizationMethod = QuantizationMethod.POWER_OF_TWO, + weights_n_bits: int = FLOAT_BITWIDTH, + weights_per_channel_threshold: bool = False, + enable_weights_quantization: bool = False, + lut_values_bitwidth: Union[int, None] = None, # If None - set 8 in hptq, o.w use it + ): + """ + Initializes an attribute quantization config. + + Args: + weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization. + weights_n_bits (int): Number of bits to quantize the coefficients. + weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor). + enable_weights_quantization (bool): Whether to quantize the model weights or not. + lut_values_bitwidth (int): Number of bits to use when quantizing in look-up-table. + + """ + + self.weights_quantization_method = weights_quantization_method + self.weights_n_bits = weights_n_bits + self.weights_per_channel_threshold = weights_per_channel_threshold + self.enable_weights_quantization = enable_weights_quantization + self.lut_values_bitwidth = lut_values_bitwidth + + def clone_and_edit(self, **kwargs): + """ + Clone the quantization config and edit some of its attributes. + + Args: + **kwargs: Keyword arguments to edit the configuration to clone. + + Returns: + Edited quantization configuration. + """ + + return clone_and_edit_object_params(self, **kwargs) + + def __eq__(self, other): + """ + Is this configuration equal to another object. + + Args: + other: Object to compare. + + Returns: + + Whether this configuration is equal to another object or not. + """ + if not isinstance(other, AttributeQuantizationConfig): + return False # pragma: no cover + return self.weights_quantization_method == other.weights_quantization_method and \ + self.weights_n_bits == other.weights_n_bits and \ + self.weights_per_channel_threshold == other.weights_per_channel_threshold and \ + self.enable_weights_quantization == other.enable_weights_quantization and \ + self.lut_values_bitwidth == other.lut_values_bitwidth + + +class OpQuantizationConfig: + """ + OpQuantizationConfig is a class to configure the quantization parameters of an operator. + """ + + def __init__(self, + default_weight_attr_config: AttributeQuantizationConfig, + attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig], + activation_quantization_method: QuantizationMethod, + activation_n_bits: int, + supported_input_activation_n_bits: Union[int, Tuple[int]], + enable_activation_quantization: bool, + quantization_preserving: bool, + fixed_scale: float, + fixed_zero_point: int, + simd_size: int, + signedness: Signedness + ): + """ + + Args: + default_weight_attr_config (AttributeQuantizationConfig): A default attribute quantization configuration for the operation. + attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]): A mapping between an op attribute name and its quantization configuration. + activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization. + activation_n_bits (int): Number of bits to quantize the activations. + supported_input_activation_n_bits (int or Tuple[int]): Number of bits that operator accepts as input. + enable_activation_quantization (bool): Whether to quantize the model activations or not. + quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output. + fixed_scale (float): Scale to use for an operator quantization parameters. + fixed_zero_point (int): Zero-point to use for an operator quantization parameters. + simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction. + signedness (bool): Set activation quantization signedness. + + """ + + self.default_weight_attr_config = default_weight_attr_config + self.attr_weights_configs_mapping = attr_weights_configs_mapping + + self.activation_quantization_method = activation_quantization_method + self.activation_n_bits = activation_n_bits + if isinstance(supported_input_activation_n_bits, tuple): + self.supported_input_activation_n_bits = supported_input_activation_n_bits + elif isinstance(supported_input_activation_n_bits, int): + self.supported_input_activation_n_bits = (supported_input_activation_n_bits,) + else: + Logger.critical(f"Supported_input_activation_n_bits only accepts int or tuple of ints, but got {type(supported_input_activation_n_bits)}") # pragma: no cover + self.enable_activation_quantization = enable_activation_quantization + self.quantization_preserving = quantization_preserving + self.fixed_scale = fixed_scale + self.fixed_zero_point = fixed_zero_point + self.signedness = signedness + self.simd_size = simd_size + + def get_info(self): + """ + + Returns: Info about the quantization configuration as a dictionary. + + """ + return self.__dict__ # pragma: no cover + + def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs): + """ + Clone the quantization config and edit some of its attributes. + Args: + attr_to_edit: A mapping between attributes names to edit and their parameters that + should be edited to a new value. + **kwargs: Keyword arguments to edit the configuration to clone. + + Returns: + Edited quantization configuration. + """ + + qc = clone_and_edit_object_params(self, **kwargs) + + # optionally: editing specific parameters in the config of specified attributes + edited_attrs = copy.deepcopy(qc.attr_weights_configs_mapping) + for attr_name, attr_cfg in qc.attr_weights_configs_mapping.items(): + if attr_name in attr_to_edit: + edited_attrs[attr_name] = attr_cfg.clone_and_edit(**attr_to_edit[attr_name]) + + qc.attr_weights_configs_mapping = edited_attrs + + return qc + + def __eq__(self, other): + """ + Is this configuration equal to another object. + Args: + other: Object to compare. + + Returns: + Whether this configuration is equal to another object or not. + """ + if not isinstance(other, OpQuantizationConfig): + return False # pragma: no cover + return self.default_weight_attr_config == other.default_weight_attr_config and \ + self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \ + self.activation_quantization_method == other.activation_quantization_method and \ + self.activation_n_bits == other.activation_n_bits and \ + self.supported_input_activation_n_bits == other.supported_input_activation_n_bits and \ + self.enable_activation_quantization == other.enable_activation_quantization and \ + self.signedness == other.signedness and \ + self.simd_size == other.simd_size + + @property + def max_input_activation_n_bits(self) -> int: + """ + Get maximum supported input bit-width. + + Returns: Maximum supported input bit-width. + + """ + return max(self.supported_input_activation_n_bits) + + +class QuantizationConfigOptions: + """ + + Wrap a set of quantization configurations to consider during the quantization + of an operator. + + """ + def __init__(self, + quantization_config_list: List[OpQuantizationConfig], + base_config: OpQuantizationConfig = None): + """ + + Args: + quantization_config_list (List[OpQuantizationConfig]): List of possible OpQuantizationConfig to gather. + base_config (OpQuantizationConfig): Fallback OpQuantizationConfig to use when optimizing the model in a non mixed-precision manner. + """ + + assert isinstance(quantization_config_list, + list), f"'QuantizationConfigOptions' options list must be a list, but received: {type(quantization_config_list)}." + for cfg in quantization_config_list: + assert isinstance(cfg, OpQuantizationConfig),\ + f"Each option must be an instance of 'OpQuantizationConfig', but found an object of type: {type(cfg)}." + self.quantization_config_list = quantization_config_list + if len(quantization_config_list) > 1: + assert base_config is not None, \ + f"For multiple configurations, a 'base_config' is required for non-mixed-precision optimization." + assert any([base_config is cfg for cfg in quantization_config_list]), \ + f"'base_config' must be included in the quantization config options list." + # Enforce base_config to be a reference to an instance in quantization_config_list. + self.base_config = base_config + elif len(quantization_config_list) == 1: + assert base_config is None or base_config == quantization_config_list[0], "'base_config' should be included in 'quantization_config_list'" + # Set base_config to be a reference to the first instance in quantization_config_list. + self.base_config = quantization_config_list[0] + else: + raise AssertionError("'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.") + + def __eq__(self, other): + """ + Is this QCOptions equal to another object. + Args: + other: Object to compare. + + Returns: + Whether this QCOptions equal to another object or not. + """ + + if not isinstance(other, QuantizationConfigOptions): + return False + if len(self.quantization_config_list) != len(other.quantization_config_list): + return False + for qc, other_qc in zip(self.quantization_config_list, other.quantization_config_list): + if qc != other_qc: + return False + return True + + def clone_and_edit(self, **kwargs): + qc_options = copy.deepcopy(self) + for qc in qc_options.quantization_config_list: + self.__edit_quantization_configuration(qc, kwargs) + return qc_options + + def clone_and_edit_weight_attribute(self, attrs: List[str] = None, **kwargs): + """ + Clones the quantization configurations and edits some of their attributes' parameters. + + Args: + attrs: attributes names to clone their configurations. If None is provided, updating the configurations + of all attributes in the operation attributes config mapping. + **kwargs: Keyword arguments to edit in the attributes configuration. + + Returns: + QuantizationConfigOptions with edited attributes configurations. + + """ + + qc_options = copy.deepcopy(self) + + for qc in qc_options.quantization_config_list: + if attrs is None: + attrs_to_update = list(qc.attr_weights_configs_mapping.keys()) + else: + if not isinstance(attrs, List): # pragma: no cover + Logger.critical(f"Expected a list of attributes but received {type(attrs)}.") + attrs_to_update = attrs + + for attr in attrs_to_update: + if qc.attr_weights_configs_mapping.get(attr) is None: # pragma: no cover + Logger.critical(f'Editing attributes is only possible for existing attributes in the configuration\'s ' + f'weights config mapping; {attr} does not exist in {qc}.') + self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs) + return qc_options + + def clone_and_map_weights_attr_keys(self, layer_attrs_mapping: Union[Dict[str, str], None]): + """ + Clones the quantization configuration options and edits the keys in each configuration attributes config mapping, + based on the given attributes names mapping. + + Args: + layer_attrs_mapping: A mapping between attributes names. + + Returns: + QuantizationConfigOptions with edited attributes names. + + """ + qc_options = copy.deepcopy(self) + + # Extract the list of existing quantization configurations from qc_options + + # Check if the base_config is already included in the quantization configuration list + # If not, add base_config to the list of configurations to update + cfgs_to_update = [cfg for cfg in qc_options.quantization_config_list] + if not any(qc_options.base_config is cfg for cfg in cfgs_to_update): + # TODO: add test for this case + cfgs_to_update.append(qc_options.base_config) + + for qc in cfgs_to_update: + if layer_attrs_mapping is None: + qc.attr_weights_configs_mapping = {} + else: + new_attr_mapping = {} + for attr in list(qc.attr_weights_configs_mapping.keys()): + new_key = layer_attrs_mapping.get(attr) + if new_key is None: # pragma: no cover + Logger.critical(f"Attribute \'{attr}\' does not exist in the provided attribute mapping.") + + new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr) + + qc.attr_weights_configs_mapping.update(new_attr_mapping) + + return qc_options + + def __edit_quantization_configuration(self, qc, kwargs): + for k, v in kwargs.items(): + assert hasattr(qc, + k), (f'Editing is only possible for existing attributes in the configuration; ' + f'{k} is not an attribute of {qc}.') + setattr(qc, k, v) + + def get_info(self): + return {f'option {i}': cfg.get_info() for i, cfg in enumerate(self.quantization_config_list)} + + +class OperatorsSetBase(TargetPlatformModelComponent): + """ + Base class to represent a set of operators. + """ + def __init__(self, name: str): + """ + + Args: + name: Name of OperatorsSet. + """ + super().__init__(name=name) + + +class OperatorsSet(OperatorsSetBase): + def __init__(self, + name: str, + qc_options: QuantizationConfigOptions = None): + """ + Set of operators that are represented by a unique label. + + Args: + name (str): Set's label (must be unique in a TargetPlatformModel). + qc_options (QuantizationConfigOptions): Configuration options to use for this set of operations. + """ + + super().__init__(name) + self.qc_options = qc_options + is_fusing_set = qc_options is None + self.is_default = _current_tp_model.get().default_qco == self.qc_options or is_fusing_set + + + def get_info(self) -> Dict[str,Any]: + """ + + Returns: Info about the set as a dictionary. + + """ + return {"name": self.name, + "is_default_qc": self.is_default} + + +class OperatorSetConcat(OperatorsSetBase): + """ + Concatenate a list of operator sets to treat them similarly in different places (like fusing). + """ + def __init__(self, *opsets: OperatorsSet): + """ + Group a list of operation sets. + + Args: + *opsets (OperatorsSet): List of operator sets to group. + """ + name = "_".join([a.name for a in opsets]) + super().__init__(name=name) + self.op_set_list = opsets + self.qc_options = None # Concat have no qc options + + def get_info(self) -> Dict[str,Any]: + """ + + Returns: Info about the sets group as a dictionary. + + """ + return {"name": self.name, + OPS_SET_LIST: [s.name for s in self.op_set_list]} + + +class Fusing(TargetPlatformModelComponent): + """ + Fusing defines a list of operators that should be combined and treated as a single operator, + hence no quantization is applied between them. + """ + + def __init__(self, + operator_groups_list: List[Union[OperatorsSet, OperatorSetConcat]], + name: str = None): + """ + Args: + operator_groups_list (List[Union[OperatorsSet, OperatorSetConcat]]): A list of operator groups, each being either an OperatorSetConcat or an OperatorsSet. + name (str): The name for the Fusing instance. If not provided, it's generated from the operator groups' names. + """ + assert isinstance(operator_groups_list, + list), f'List of operator groups should be of type list but is {type(operator_groups_list)}' + assert len(operator_groups_list) >= 2, f'Fusing can not be created for a single operators group' + + # Generate a name from the operator groups if no name is provided + if name is None: + name = '_'.join([x.name for x in operator_groups_list]) + + super().__init__(name) + self.operator_groups_list = operator_groups_list + + def contains(self, other: Any) -> bool: + """ + Determines if the current Fusing instance contains another Fusing instance. + + Args: + other: The other Fusing instance to check against. + + Returns: + A boolean indicating whether the other instance is contained within this one. + """ + if not isinstance(other, Fusing): + return False + + # Check for containment by comparing operator groups + for i in range(len(self.operator_groups_list) - len(other.operator_groups_list) + 1): + for j in range(len(other.operator_groups_list)): + if self.operator_groups_list[i + j] != other.operator_groups_list[j] and not ( + isinstance(self.operator_groups_list[i + j], OperatorSetConcat) and ( + other.operator_groups_list[j] in self.operator_groups_list[i + j].op_set_list)): + break + else: + # If all checks pass, the other Fusing instance is contained + return True + # Other Fusing instance is not contained + return False + + def get_info(self): + """ + Retrieves information about the Fusing instance, including its name and the sequence of operator groups. + + Returns: + A dictionary with the Fusing instance's name as the key and the sequence of operator groups as the value, + or just the sequence of operator groups if no name is set. + """ + if self.name is not None: + return {self.name: ' -> '.join([x.name for x in self.operator_groups_list])} + return ' -> '.join([x.name for x in self.operator_groups_list]) + + +class TargetPlatformModel(ImmutableClass): + """ + Represents the hardware configuration used for quantized model inference. + + This model defines: + - The operators and their associated quantization configurations. + - Fusing patterns, enabling multiple operators to be combined into a single operator + for optimization during inference. + - Versioning support through minor and patch versions for backward compatibility. + + Attributes: + SCHEMA_VERSION (int): The schema version of the target platform model. + """ + SCHEMA_VERSION = 1 + def __init__(self, + default_qco: QuantizationConfigOptions, + tpc_minor_version: Optional[int], + tpc_patch_version: Optional[int], + add_metadata: bool = True, + name="default_tp_model"): + """ + + Args: + default_qco (QuantizationConfigOptions): Default QuantizationConfigOptions to use for operators that their QuantizationConfigOptions are not defined in the model. + tpc_minor_version (Optional[int]): The minor version of the target platform capabilities. + tpc_patch_version (Optional[int]): The patch version of the target platform capabilities. + add_metadata (bool): Whether to add metadata to the model or not. + name (str): Name of the model. + + Raises: + AssertionError: If the provided `default_qco` does not contain exactly one quantization configuration. + """ + + super().__init__() + self.tpc_minor_version = tpc_minor_version + self.tpc_patch_version = tpc_patch_version + self.add_metadata = add_metadata + self.name = name + self.operator_set = [] + assert isinstance(default_qco, QuantizationConfigOptions), \ + "default_qco must be an instance of QuantizationConfigOptions" + assert len(default_qco.quantization_config_list) == 1, \ + "Default QuantizationConfigOptions must contain exactly one option." + + self.default_qco = default_qco + self.fusing_patterns = [] + self.is_simd_padding = False + + def get_config_options_by_operators_set(self, + operators_set_name: str) -> QuantizationConfigOptions: + """ + Get the QuantizationConfigOptions of a OperatorsSet by the OperatorsSet name. + If the name is not in the model, the default QuantizationConfigOptions is returned. + + Args: + operators_set_name: Name of OperatorsSet to get. + + Returns: + QuantizationConfigOptions to use for ops in OperatorsSet named operators_set_name. + """ + for op_set in self.operator_set: + if operators_set_name == op_set.name: + return op_set.qc_options + return self.default_qco + + def get_default_op_quantization_config(self) -> OpQuantizationConfig: + """ + + Returns: The default OpQuantizationConfig of the TargetPlatformModel. + + """ + assert len(self.default_qco.quantization_config_list) == 1, \ + f'Default quantization configuration options must contain only one option,' \ + f' but found {len(get_current_tp_model().default_qco.quantization_config_list)} configurations.' + return self.default_qco.quantization_config_list[0] + + def is_opset_in_model(self, + opset_name: str) -> bool: + """ + Check whether an operators set is defined in the model or not. + + Args: + opset_name: Operators set name to check. + + Returns: + Whether an operators set is defined in the model or not. + """ + return opset_name in [x.name for x in self.operator_set] + + def get_opset_by_name(self, + opset_name: str) -> OperatorsSetBase: + """ + Get an OperatorsSet object from the model by its name. + If name is not in the model - None is returned. + + Args: + opset_name: OperatorsSet name to retrieve. + + Returns: + OperatorsSet object with the name opset_name, or None if opset_name is not in the model. + """ + + opset_list = [x for x in self.operator_set if x.name == opset_name] + assert len(opset_list) <= 1, f'Found more than one OperatorsSet in' \ + f' TargetPlatformModel with the name {opset_name}. ' \ + f'OperatorsSet name must be unique.' + if len(opset_list) == 0: # opset_name is not in the model. + return None + + return opset_list[0] # There's one opset with that name + + def append_component(self, + tp_model_component: TargetPlatformModelComponent): + """ + Attach a TargetPlatformModel component to the model. Components can be for example: + Fusing, OperatorsSet, etc. + + Args: + tp_model_component: Component to attach to the model. + + """ + if isinstance(tp_model_component, Fusing): + self.fusing_patterns.append(tp_model_component) + elif isinstance(tp_model_component, OperatorsSetBase): + self.operator_set.append(tp_model_component) + else: # pragma: no cover + Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.') + + def __enter__(self): + """ + Start defining the TargetPlatformModel using 'with'. + + Returns: Initialized TargetPlatformModel object. + + """ + _current_tp_model.set(self) + return self + + def __exit__(self, exc_type, exc_value, tb): + """ + Finish defining the TargetPlatformModel at the end of the 'with' clause. + Returns the final and immutable TargetPlatformModel instance. + """ + + if exc_value is not None: + print(exc_value, exc_value.args) + raise exc_value + self.__validate_model() # Assert that model is valid. + _current_tp_model.reset() + self.initialized_done() # Make model immutable. + return self + + def __validate_model(self): + """ + + Assert model is valid. + Model is invalid if, for example, it contains multiple operator sets with the same name, + as their names should be unique. + + """ + opsets_names = [op.name for op in self.operator_set] + if len(set(opsets_names)) != len(opsets_names): + Logger.critical(f'Operator Sets must have unique names.') + + def get_default_config(self) -> OpQuantizationConfig: + """ + + Returns: + + """ + assert len(self.default_qco.quantization_config_list) == 1, \ + f'Default quantization configuration options must contain only one option,' \ + f' but found {len(self.default_qco.quantization_config_list)} configurations.' + return self.default_qco.quantization_config_list[0] + + def get_info(self) -> Dict[str, Any]: + """ + + Returns: Dictionary that summarizes the TargetPlatformModel properties (for display purposes). + + """ + return {"Model name": self.name, + "Default quantization config": self.get_default_config().get_info(), + "Operators sets": [o.get_info() for o in self.operator_set], + "Fusing patterns": [f.get_info() for f in self.fusing_patterns] + } + + def show(self): + """ + + Display the TargetPlatformModel. + + """ + pprint.pprint(self.get_info(), sort_dicts=False) + + def set_simd_padding(self, + is_simd_padding: bool): + """ + Set flag is_simd_padding to indicate whether this TP model defines + that padding due to SIMD constrains occurs. + + Args: + is_simd_padding: Whether this TP model defines that padding due to SIMD constrains occurs. + + """ + self.is_simd_padding = is_simd_padding + diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py b/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py index 7767a9a11..fd72ac6fd 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py @@ -13,13 +13,11 @@ # limitations under the License. # ============================================================================== -from model_compression_toolkit.target_platform_capabilities.target_platform.fusing import Fusing from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import AttributeFilter from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities, OperationsSetToLayers, Smaller, SmallerEq, NotEq, Eq, GreaterEq, Greater, LayerFilterParams, OperationsToLayers, get_current_tpc -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import get_default_quantization_config_options, TargetPlatformModel -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - OpQuantizationConfig, QuantizationConfigOptions, AttributeQuantizationConfig, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSet, OperatorSetConcat +from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import get_default_quantization_config_options +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OperatorsSet, \ + OperatorSetConcat, Signedness, AttributeQuantizationConfig, OpQuantizationConfig, QuantizationConfigOptions, Fusing from mct_quantizers import QuantizationMethod diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py b/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py index 6ceca6e3a..fa9315adc 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py @@ -14,72 +14,3 @@ # ============================================================================== -from typing import Any, List, Union - -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorSetConcat, \ - OperatorsSet -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import TargetPlatformModelComponent - - -class Fusing(TargetPlatformModelComponent): - """ - Fusing defines a list of operators that should be combined and treated as a single operator, - hence no quantization is applied between them. - """ - - def __init__(self, - operator_groups_list: List[Union[OperatorsSet, OperatorSetConcat]], - name: str = None): - """ - Args: - operator_groups_list (List[Union[OperatorsSet, OperatorSetConcat]]): A list of operator groups, each being either an OperatorSetConcat or an OperatorsSet. - name (str): The name for the Fusing instance. If not provided, it's generated from the operator groups' names. - """ - assert isinstance(operator_groups_list, - list), f'List of operator groups should be of type list but is {type(operator_groups_list)}' - assert len(operator_groups_list) >= 2, f'Fusing can not be created for a single operators group' - - # Generate a name from the operator groups if no name is provided - if name is None: - name = '_'.join([x.name for x in operator_groups_list]) - - super().__init__(name) - self.operator_groups_list = operator_groups_list - - def contains(self, other: Any) -> bool: - """ - Determines if the current Fusing instance contains another Fusing instance. - - Args: - other: The other Fusing instance to check against. - - Returns: - A boolean indicating whether the other instance is contained within this one. - """ - if not isinstance(other, Fusing): - return False - - # Check for containment by comparing operator groups - for i in range(len(self.operator_groups_list) - len(other.operator_groups_list) + 1): - for j in range(len(other.operator_groups_list)): - if self.operator_groups_list[i + j] != other.operator_groups_list[j] and not ( - isinstance(self.operator_groups_list[i + j], OperatorSetConcat) and ( - other.operator_groups_list[j] in self.operator_groups_list[i + j].op_set_list)): - break - else: - # If all checks pass, the other Fusing instance is contained - return True - # Other Fusing instance is not contained - return False - - def get_info(self): - """ - Retrieves information about the Fusing instance, including its name and the sequence of operator groups. - - Returns: - A dictionary with the Fusing instance's name as the key and the sequence of operator groups as the value, - or just the sequence of operator groups if no name is set. - """ - if self.name is not None: - return {self.name: ' -> '.join([x.name for x in self.operator_groups_list])} - return ' -> '.join([x.name for x in self.operator_groups_list]) \ No newline at end of file diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py b/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py index 1ee257ec3..fa9315adc 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py @@ -13,366 +13,4 @@ # limitations under the License. # ============================================================================== -import copy -from typing import List, Dict, Union, Any, Tuple -from enum import Enum - -from mct_quantizers import QuantizationMethod -from model_compression_toolkit.constants import FLOAT_BITWIDTH -from model_compression_toolkit.logger import Logger - - -class Signedness(Enum): - """ - An enum for choosing the signedness of the quantization method: - - AUTO - Signedness decided automatically by quantization. - SIGNED - Force signed quantization. - UNSIGNED - Force unsigned quantization. - """ - AUTO = 0 - SIGNED = 1 - UNSIGNED = 2 - - -def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any: - """ - Clones the given object and edit some of its parameters. - - Args: - obj: An object to clone. - **kwargs: Keyword arguments to edit in the cloned object. - - Returns: - Edited copy of the given object. - """ - - obj_copy = copy.deepcopy(obj) - for k, v in kwargs.items(): - assert hasattr(obj_copy, - k), f'Edit parameter is possible only for existing parameters in the given object, ' \ - f'but {k} is not a parameter of {obj_copy}.' - setattr(obj_copy, k, v) - return obj_copy - - -class AttributeQuantizationConfig: - """ - Hold the quantization configuration of a weight attribute of a layer. - """ - def __init__(self, - weights_quantization_method: QuantizationMethod = QuantizationMethod.POWER_OF_TWO, - weights_n_bits: int = FLOAT_BITWIDTH, - weights_per_channel_threshold: bool = False, - enable_weights_quantization: bool = False, - lut_values_bitwidth: Union[int, None] = None, # If None - set 8 in hptq, o.w use it - ): - """ - Initializes an attribute quantization config. - - Args: - weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization. - weights_n_bits (int): Number of bits to quantize the coefficients. - weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor). - enable_weights_quantization (bool): Whether to quantize the model weights or not. - lut_values_bitwidth (int): Number of bits to use when quantizing in look-up-table. - - """ - - self.weights_quantization_method = weights_quantization_method - self.weights_n_bits = weights_n_bits - self.weights_per_channel_threshold = weights_per_channel_threshold - self.enable_weights_quantization = enable_weights_quantization - self.lut_values_bitwidth = lut_values_bitwidth - - def clone_and_edit(self, **kwargs): - """ - Clone the quantization config and edit some of its attributes. - - Args: - **kwargs: Keyword arguments to edit the configuration to clone. - - Returns: - Edited quantization configuration. - """ - - return clone_and_edit_object_params(self, **kwargs) - - def __eq__(self, other): - """ - Is this configuration equal to another object. - - Args: - other: Object to compare. - - Returns: - - Whether this configuration is equal to another object or not. - """ - if not isinstance(other, AttributeQuantizationConfig): - return False # pragma: no cover - return self.weights_quantization_method == other.weights_quantization_method and \ - self.weights_n_bits == other.weights_n_bits and \ - self.weights_per_channel_threshold == other.weights_per_channel_threshold and \ - self.enable_weights_quantization == other.enable_weights_quantization and \ - self.lut_values_bitwidth == other.lut_values_bitwidth - - -class OpQuantizationConfig: - """ - OpQuantizationConfig is a class to configure the quantization parameters of an operator. - """ - - def __init__(self, - default_weight_attr_config: AttributeQuantizationConfig, - attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig], - activation_quantization_method: QuantizationMethod, - activation_n_bits: int, - supported_input_activation_n_bits: Union[int, Tuple[int]], - enable_activation_quantization: bool, - quantization_preserving: bool, - fixed_scale: float, - fixed_zero_point: int, - simd_size: int, - signedness: Signedness - ): - """ - - Args: - default_weight_attr_config (AttributeQuantizationConfig): A default attribute quantization configuration for the operation. - attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]): A mapping between an op attribute name and its quantization configuration. - activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization. - activation_n_bits (int): Number of bits to quantize the activations. - supported_input_activation_n_bits (int or Tuple[int]): Number of bits that operator accepts as input. - enable_activation_quantization (bool): Whether to quantize the model activations or not. - quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output. - fixed_scale (float): Scale to use for an operator quantization parameters. - fixed_zero_point (int): Zero-point to use for an operator quantization parameters. - simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction. - signedness (bool): Set activation quantization signedness. - - """ - - self.default_weight_attr_config = default_weight_attr_config - self.attr_weights_configs_mapping = attr_weights_configs_mapping - - self.activation_quantization_method = activation_quantization_method - self.activation_n_bits = activation_n_bits - if isinstance(supported_input_activation_n_bits, tuple): - self.supported_input_activation_n_bits = supported_input_activation_n_bits - elif isinstance(supported_input_activation_n_bits, int): - self.supported_input_activation_n_bits = (supported_input_activation_n_bits,) - else: - Logger.critical(f"Supported_input_activation_n_bits only accepts int or tuple of ints, but got {type(supported_input_activation_n_bits)}") # pragma: no cover - self.enable_activation_quantization = enable_activation_quantization - self.quantization_preserving = quantization_preserving - self.fixed_scale = fixed_scale - self.fixed_zero_point = fixed_zero_point - self.signedness = signedness - self.simd_size = simd_size - - def get_info(self): - """ - - Returns: Info about the quantization configuration as a dictionary. - - """ - return self.__dict__ # pragma: no cover - - def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs): - """ - Clone the quantization config and edit some of its attributes. - Args: - attr_to_edit: A mapping between attributes names to edit and their parameters that - should be edited to a new value. - **kwargs: Keyword arguments to edit the configuration to clone. - - Returns: - Edited quantization configuration. - """ - - qc = clone_and_edit_object_params(self, **kwargs) - - # optionally: editing specific parameters in the config of specified attributes - edited_attrs = copy.deepcopy(qc.attr_weights_configs_mapping) - for attr_name, attr_cfg in qc.attr_weights_configs_mapping.items(): - if attr_name in attr_to_edit: - edited_attrs[attr_name] = attr_cfg.clone_and_edit(**attr_to_edit[attr_name]) - - qc.attr_weights_configs_mapping = edited_attrs - - return qc - - def __eq__(self, other): - """ - Is this configuration equal to another object. - Args: - other: Object to compare. - - Returns: - Whether this configuration is equal to another object or not. - """ - if not isinstance(other, OpQuantizationConfig): - return False # pragma: no cover - return self.default_weight_attr_config == other.default_weight_attr_config and \ - self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \ - self.activation_quantization_method == other.activation_quantization_method and \ - self.activation_n_bits == other.activation_n_bits and \ - self.supported_input_activation_n_bits == other.supported_input_activation_n_bits and \ - self.enable_activation_quantization == other.enable_activation_quantization and \ - self.signedness == other.signedness and \ - self.simd_size == other.simd_size - - @property - def max_input_activation_n_bits(self) -> int: - """ - Get maximum supported input bit-width. - - Returns: Maximum supported input bit-width. - - """ - return max(self.supported_input_activation_n_bits) - - -class QuantizationConfigOptions: - """ - - Wrap a set of quantization configurations to consider during the quantization - of an operator. - - """ - def __init__(self, - quantization_config_list: List[OpQuantizationConfig], - base_config: OpQuantizationConfig = None): - """ - - Args: - quantization_config_list (List[OpQuantizationConfig]): List of possible OpQuantizationConfig to gather. - base_config (OpQuantizationConfig): Fallback OpQuantizationConfig to use when optimizing the model in a non mixed-precision manner. - """ - - assert isinstance(quantization_config_list, - list), f"'QuantizationConfigOptions' options list must be a list, but received: {type(quantization_config_list)}." - for cfg in quantization_config_list: - assert isinstance(cfg, OpQuantizationConfig),\ - f"Each option must be an instance of 'OpQuantizationConfig', but found an object of type: {type(cfg)}." - self.quantization_config_list = quantization_config_list - if len(quantization_config_list) > 1: - assert base_config is not None, \ - f"For multiple configurations, a 'base_config' is required for non-mixed-precision optimization." - assert any([base_config is cfg for cfg in quantization_config_list]), \ - f"'base_config' must be included in the quantization config options list." - # Enforce base_config to be a reference to an instance in quantization_config_list. - self.base_config = base_config - elif len(quantization_config_list) == 1: - assert base_config is None or base_config == quantization_config_list[0], "'base_config' should be included in 'quantization_config_list'" - # Set base_config to be a reference to the first instance in quantization_config_list. - self.base_config = quantization_config_list[0] - else: - raise AssertionError("'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.") - - def __eq__(self, other): - """ - Is this QCOptions equal to another object. - Args: - other: Object to compare. - - Returns: - Whether this QCOptions equal to another object or not. - """ - - if not isinstance(other, QuantizationConfigOptions): - return False - if len(self.quantization_config_list) != len(other.quantization_config_list): - return False - for qc, other_qc in zip(self.quantization_config_list, other.quantization_config_list): - if qc != other_qc: - return False - return True - - def clone_and_edit(self, **kwargs): - qc_options = copy.deepcopy(self) - for qc in qc_options.quantization_config_list: - self.__edit_quantization_configuration(qc, kwargs) - return qc_options - - def clone_and_edit_weight_attribute(self, attrs: List[str] = None, **kwargs): - """ - Clones the quantization configurations and edits some of their attributes' parameters. - - Args: - attrs: attributes names to clone their configurations. If None is provided, updating the configurations - of all attributes in the operation attributes config mapping. - **kwargs: Keyword arguments to edit in the attributes configuration. - - Returns: - QuantizationConfigOptions with edited attributes configurations. - - """ - - qc_options = copy.deepcopy(self) - - for qc in qc_options.quantization_config_list: - if attrs is None: - attrs_to_update = list(qc.attr_weights_configs_mapping.keys()) - else: - if not isinstance(attrs, List): # pragma: no cover - Logger.critical(f"Expected a list of attributes but received {type(attrs)}.") - attrs_to_update = attrs - - for attr in attrs_to_update: - if qc.attr_weights_configs_mapping.get(attr) is None: # pragma: no cover - Logger.critical(f'Editing attributes is only possible for existing attributes in the configuration\'s ' - f'weights config mapping; {attr} does not exist in {qc}.') - self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs) - return qc_options - - def clone_and_map_weights_attr_keys(self, layer_attrs_mapping: Union[Dict[str, str], None]): - """ - Clones the quantization configuration options and edits the keys in each configuration attributes config mapping, - based on the given attributes names mapping. - - Args: - layer_attrs_mapping: A mapping between attributes names. - - Returns: - QuantizationConfigOptions with edited attributes names. - - """ - qc_options = copy.deepcopy(self) - - # Extract the list of existing quantization configurations from qc_options - - # Check if the base_config is already included in the quantization configuration list - # If not, add base_config to the list of configurations to update - cfgs_to_update = [cfg for cfg in qc_options.quantization_config_list] - if not any(qc_options.base_config is cfg for cfg in cfgs_to_update): - # TODO: add test for this case - cfgs_to_update.append(qc_options.base_config) - - for qc in cfgs_to_update: - if layer_attrs_mapping is None: - qc.attr_weights_configs_mapping = {} - else: - new_attr_mapping = {} - for attr in list(qc.attr_weights_configs_mapping.keys()): - new_key = layer_attrs_mapping.get(attr) - if new_key is None: # pragma: no cover - Logger.critical(f"Attribute \'{attr}\' does not exist in the provided attribute mapping.") - - new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr) - - qc.attr_weights_configs_mapping.update(new_attr_mapping) - - return qc_options - - def __edit_quantization_configuration(self, qc, kwargs): - for k, v in kwargs.items(): - assert hasattr(qc, - k), (f'Editing is only possible for existing attributes in the configuration; ' - f'{k} is not an attribute of {qc}.') - setattr(qc, k, v) - - def get_info(self): - return {f'option {i}': cfg.get_info() for i, cfg in enumerate(self.quantization_config_list)} diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py b/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py index 1b737b3bd..fa9315adc 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/operators.py @@ -12,76 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from typing import Dict, Any -from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import TargetPlatformModelComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import _current_tp_model -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import QuantizationConfigOptions - -class OperatorsSetBase(TargetPlatformModelComponent): - """ - Base class to represent a set of operators. - """ - def __init__(self, name: str): - """ - - Args: - name: Name of OperatorsSet. - """ - super().__init__(name=name) - - -class OperatorsSet(OperatorsSetBase): - def __init__(self, - name: str, - qc_options: QuantizationConfigOptions = None): - """ - Set of operators that are represented by a unique label. - - Args: - name (str): Set's label (must be unique in a TargetPlatformModel). - qc_options (QuantizationConfigOptions): Configuration options to use for this set of operations. - """ - - super().__init__(name) - self.qc_options = qc_options - is_fusing_set = qc_options is None - self.is_default = _current_tp_model.get().default_qco == self.qc_options or is_fusing_set - - - def get_info(self) -> Dict[str,Any]: - """ - - Returns: Info about the set as a dictionary. - - """ - return {"name": self.name, - "is_default_qc": self.is_default} - - -class OperatorSetConcat(OperatorsSetBase): - """ - Concatenate a list of operator sets to treat them similarly in different places (like fusing). - """ - def __init__(self, *opsets: OperatorsSet): - """ - Group a list of operation sets. - - Args: - *opsets (OperatorsSet): List of operator sets to group. - """ - name = "_".join([a.name for a in opsets]) - super().__init__(name=name) - self.op_set_list = opsets - self.qc_options = None # Concat have no qc options - - def get_info(self) -> Dict[str,Any]: - """ - - Returns: Info about the sets group as a dictionary. - - """ - return {"name": self.name, - OPS_SET_LIST: [s.name for s in self.op_set_list]} diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py b/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py index 4fbf68ea6..212988b80 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py @@ -13,19 +13,8 @@ # limitations under the License. # ============================================================================== -import pprint -from typing import Any, Dict - -from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import _current_tp_model, \ - get_current_tp_model -from model_compression_toolkit.target_platform_capabilities.target_platform.fusing import Fusing -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model_component import \ - TargetPlatformModelComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \ - QuantizationConfigOptions -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSetBase -from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass -from model_compression_toolkit.logger import Logger +from model_compression_toolkit.target_platform_capabilities.target_platform.current_tp_model import get_current_tp_model +from model_compression_toolkit.target_platform_capabilities.schema.v1 import QuantizationConfigOptions def get_default_quantization_config_options() -> QuantizationConfigOptions: @@ -51,192 +40,3 @@ def get_default_quantization_config(): return get_current_tp_model().get_default_op_quantization_config() -class TargetPlatformModel(ImmutableClass): - """ - Modeling of the hardware the quantized model will use during inference. - The model contains definition of operators, quantization configurations of them, and - fusing patterns so that multiple operators will be combined into a single operator. - """ - - def __init__(self, - default_qco: QuantizationConfigOptions, - add_metadata: bool = False, - name="default_tp_model"): - """ - - Args: - default_qco (QuantizationConfigOptions): Default QuantizationConfigOptions to use for operators that their QuantizationConfigOptions are not defined in the model. - add_metadata (bool): Whether to add metadata to the model or not. - name (str): Name of the model. - """ - - super().__init__() - self.add_metadata = add_metadata - self.name = name - self.operator_set = [] - assert isinstance(default_qco, QuantizationConfigOptions) - assert len(default_qco.quantization_config_list) == 1, \ - f'Default QuantizationConfigOptions must contain only one option' - self.default_qco = default_qco - self.fusing_patterns = [] - self.is_simd_padding = False - - def get_config_options_by_operators_set(self, - operators_set_name: str) -> QuantizationConfigOptions: - """ - Get the QuantizationConfigOptions of a OperatorsSet by the OperatorsSet name. - If the name is not in the model, the default QuantizationConfigOptions is returned. - - Args: - operators_set_name: Name of OperatorsSet to get. - - Returns: - QuantizationConfigOptions to use for ops in OperatorsSet named operators_set_name. - """ - for op_set in self.operator_set: - if operators_set_name == op_set.name: - return op_set.qc_options - return self.default_qco - - def get_default_op_quantization_config(self) -> OpQuantizationConfig: - """ - - Returns: The default OpQuantizationConfig of the TargetPlatformModel. - - """ - assert len(self.default_qco.quantization_config_list) == 1, \ - f'Default quantization configuration options must contain only one option,' \ - f' but found {len(get_current_tp_model().default_qco.quantization_config_list)} configurations.' - return self.default_qco.quantization_config_list[0] - - def is_opset_in_model(self, - opset_name: str) -> bool: - """ - Check whether an operators set is defined in the model or not. - - Args: - opset_name: Operators set name to check. - - Returns: - Whether an operators set is defined in the model or not. - """ - return opset_name in [x.name for x in self.operator_set] - - def get_opset_by_name(self, - opset_name: str) -> OperatorsSetBase: - """ - Get an OperatorsSet object from the model by its name. - If name is not in the model - None is returned. - - Args: - opset_name: OperatorsSet name to retrieve. - - Returns: - OperatorsSet object with the name opset_name, or None if opset_name is not in the model. - """ - - opset_list = [x for x in self.operator_set if x.name == opset_name] - assert len(opset_list) <= 1, f'Found more than one OperatorsSet in' \ - f' TargetPlatformModel with the name {opset_name}. ' \ - f'OperatorsSet name must be unique.' - if len(opset_list) == 0: # opset_name is not in the model. - return None - - return opset_list[0] # There's one opset with that name - - def append_component(self, - tp_model_component: TargetPlatformModelComponent): - """ - Attach a TargetPlatformModel component to the model. Components can be for example: - Fusing, OperatorsSet, etc. - - Args: - tp_model_component: Component to attach to the model. - - """ - if isinstance(tp_model_component, Fusing): - self.fusing_patterns.append(tp_model_component) - elif isinstance(tp_model_component, OperatorsSetBase): - self.operator_set.append(tp_model_component) - else: # pragma: no cover - Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.') - - def __enter__(self): - """ - Start defining the TargetPlatformModel using 'with'. - - Returns: Initialized TargetPlatformModel object. - - """ - _current_tp_model.set(self) - return self - - def __exit__(self, exc_type, exc_value, tb): - """ - Finish defining the TargetPlatformModel at the end of the 'with' clause. - Returns the final and immutable TargetPlatformModel instance. - """ - - if exc_value is not None: - print(exc_value, exc_value.args) - raise exc_value - self.__validate_model() # Assert that model is valid. - _current_tp_model.reset() - self.initialized_done() # Make model immutable. - return self - - def __validate_model(self): - """ - - Assert model is valid. - Model is invalid if, for example, it contains multiple operator sets with the same name, - as their names should be unique. - - """ - opsets_names = [op.name for op in self.operator_set] - if len(set(opsets_names)) != len(opsets_names): - Logger.critical(f'Operator Sets must have unique names.') - - def get_default_config(self) -> OpQuantizationConfig: - """ - - Returns: - - """ - assert len(self.default_qco.quantization_config_list) == 1, \ - f'Default quantization configuration options must contain only one option,' \ - f' but found {len(self.default_qco.quantization_config_list)} configurations.' - return self.default_qco.quantization_config_list[0] - - def get_info(self) -> Dict[str, Any]: - """ - - Returns: Dictionary that summarizes the TargetPlatformModel properties (for display purposes). - - """ - return {"Model name": self.name, - "Default quantization config": self.get_default_config().get_info(), - "Operators sets": [o.get_info() for o in self.operator_set], - "Fusing patterns": [f.get_info() for f in self.fusing_patterns] - } - - def show(self): - """ - - Display the TargetPlatformModel. - - """ - pprint.pprint(self.get_info(), sort_dicts=False) - - def set_simd_padding(self, - is_simd_padding: bool): - """ - Set flag is_simd_padding to indicate whether this TP model defines - that padding due to SIMD constrains occurs. - - Args: - is_simd_padding: Whether this TP model defines that padding due to SIMD constrains occurs. - - """ - self.is_simd_padding = is_simd_padding - diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py index f8607784e..58f32dc10 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py @@ -18,8 +18,7 @@ from model_compression_toolkit.logger import Logger from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.target_platform_capabilities_component import TargetPlatformCapabilitiesComponent -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorSetConcat, \ - OperatorsSetBase +from model_compression_toolkit.target_platform_capabilities.schema.v1 import OperatorsSetBase, OperatorSetConcat from model_compression_toolkit import DefaultDict diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py index 80385553b..aece25063 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py @@ -24,10 +24,8 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.target_platform_capabilities_component import TargetPlatformCapabilitiesComponent from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.layer_filter_params import LayerFilterParams from model_compression_toolkit.target_platform_capabilities.immutable import ImmutableClass -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import QuantizationConfigOptions, \ - OpQuantizationConfig -from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSetBase -from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import TargetPlatformModel +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OperatorsSetBase, \ + OpQuantizationConfig, QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc from model_compression_toolkit.constants import MCT_VERSION, TPC_VERSION @@ -36,20 +34,14 @@ class TargetPlatformCapabilities(ImmutableClass): """ Attach framework information to a modeled hardware. """ - def __init__(self, - tp_model: TargetPlatformModel, - name: str = "base", - version: str = None): + def __init__(self, tp_model: TargetPlatformModel): """ Args: tp_model (TargetPlatformModel): Modeled hardware to attach framework information to. - name (str): Name of the TargetPlatformCapabilities. - version (str): TPC version. """ super().__init__() - self.name = name assert isinstance(tp_model, TargetPlatformModel), f'Target platform model that was passed to TargetPlatformCapabilities must be of type TargetPlatformModel, but has type of {type(tp_model)}' self.tp_model = tp_model self.op_sets_to_layers = OperationsToLayers() # Init an empty OperationsToLayers @@ -57,7 +49,6 @@ def __init__(self, # Track the unused opsets for warning purposes. self.__tp_model_opsets_not_used = [s.name for s in tp_model.operator_set] self.remove_fusing_names_from_not_used_list() - self.version = version def get_layers_by_opset_name(self, opset_name: str) -> List[Any]: """ diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py index 3502d4c10..ce9f93df1 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -88,7 +87,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -102,7 +101,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -152,12 +151,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + name=name, + add_metadata=False) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -172,42 +176,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py index 72d9a363b..02f0155d6 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py index 086f0cace..ca6a3bf22 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py index 09708edda..011575c1d 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -84,7 +83,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -98,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -151,12 +150,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -169,42 +173,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py index f11ca92d5..b0c278670 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py index b50921dcd..7545331fe 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py index f07ea4e58..3bc2315e7 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py @@ -15,13 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig - +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -84,7 +82,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -98,7 +96,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -147,12 +145,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -165,42 +168,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py index 0f8e63b2e..3173878f0 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_pot_tpc_keras_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Reshape, tf.reshape, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py index 7f46dfe0c..dde0ff2c3 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_pot_tpc_pytorch_tpc', tp_model=imx500_pot_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py index 2c10e5e08..4f868e52e 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -90,7 +89,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +103,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,12 +153,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=2, + tpc_patch_version=0, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -174,42 +178,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py index 8845d929d..dd79a0a95 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -60,7 +61,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py index ad60efc53..3877011db 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py index b7565df45..5714fdee1 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -86,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -100,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -153,12 +152,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=2, + tpc_patch_version=0, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -171,42 +175,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py index 08b791f4f..ce5abf498 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py index 59189d306..43a52600b 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py index 84662cd23..25435f098 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -90,7 +89,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -104,7 +103,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -154,7 +153,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -165,7 +164,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -173,14 +172,19 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = tp.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], - base_config=const_config_input16) + const_configuration_options_inout16 = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], + base_config=const_config_input16) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=3, + tpc_patch_version=0, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -195,44 +199,44 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False, + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False, supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet("Default16BitInout", const_configuration_options_inout16) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Default16BitInout", const_configuration_options_inout16) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add", const_configuration_options_inout16) - sub = tp.OperatorsSet("Sub", const_configuration_options_inout16) - mul = tp.OperatorsSet("Mul", const_configuration_options_inout16) - div = tp.OperatorsSet("Div", const_configuration_options) - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add", const_configuration_options_inout16) + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub", const_configuration_options_inout16) + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul", const_configuration_options_inout16) + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div", const_configuration_options) + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py index 439e4321a..5ac0bae89 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -49,7 +50,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -60,7 +61,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py index 6abedcbe5..65ae29a48 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -43,7 +44,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -52,9 +53,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py index 0f1901846..3cf8fd66d 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py @@ -15,13 +15,12 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \ WEIGHTS_QUANTIZATION_METHOD -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -86,7 +85,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -100,7 +99,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -153,7 +152,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -164,12 +163,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=3, + tpc_patch_version=0, + add_metadata=True, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -182,42 +186,42 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) # Define operations sets without quantization configuration # options (useful for creating fusing patterns, for example): - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add", const_configuration_options) - sub = tp.OperatorsSet("Sub", const_configuration_options) - mul = tp.OperatorsSet("Mul", const_configuration_options) - div = tp.OperatorsSet("Div", const_configuration_options) - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add", const_configuration_options) + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub", const_configuration_options) + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul", const_configuration_options) + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div", const_configuration_options) + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py index b5c8bd213..dc586d1d3 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ @@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [Identity, tf.identity, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py index 8e0326499..0dde170c7 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py @@ -23,6 +23,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py index 424bccd17..fd26da442 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -112,7 +111,7 @@ def get_op_quantization_configs() -> \ # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -126,7 +125,7 @@ def get_op_quantization_configs() -> \ signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -176,12 +175,12 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16)) - default_config_options_16bit = tp.QuantizationConfigOptions([default_config_input16, - default_config_input16.clone_and_edit(activation_n_bits=16, + default_config_options_16bit = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config_input16, + default_config_input16.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)], - base_config=default_config_input16) + base_config=default_config_input16) # Create a QuantizationConfigOptions for quantizing constants in functional ops. # Constant configuration is similar to the default eight bit configuration except for PoT @@ -192,7 +191,7 @@ def generate_tp_model(default_config: OpQuantizationConfig, default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that # support 16 bit as input and output. @@ -200,9 +199,9 @@ def generate_tp_model(default_config: OpQuantizationConfig, supported_input_activation_n_bits=(8, 16)) const_config_input16_output16 = const_config_input16.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16 = tp.QuantizationConfigOptions([const_config_input16_output16, - const_config_input16], - base_config=const_config_input16) + const_configuration_options_inout16 = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16, + const_config_input16], + base_config=const_config_input16) const_config_input16_per_tensor = const_config.clone_and_edit( supported_input_activation_n_bits=(8, 16), @@ -212,15 +211,15 @@ def generate_tp_model(default_config: OpQuantizationConfig, ) const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit( activation_n_bits=16, signedness=Signedness.SIGNED) - const_configuration_options_inout16_per_tensor = tp.QuantizationConfigOptions([const_config_input16_output16_per_tensor, - const_config_input16_per_tensor], - base_config=const_config_input16_per_tensor) + const_configuration_options_inout16_per_tensor = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config_input16_output16_per_tensor, + const_config_input16_per_tensor], + base_config=const_config_input16_per_tensor) qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False, quantization_preserving=True, default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - qpreserving_const_config_options = tp.QuantizationConfigOptions([qpreserving_const_config]) + qpreserving_const_config_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([qpreserving_const_config]) mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED) for mp_cfg in mixed_precision_cfg_list] @@ -228,7 +227,11 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name) + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=4, + tpc_patch_version=0, + add_metadata=True, name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -243,60 +246,60 @@ def generate_tp_model(default_config: OpQuantizationConfig, # May suit for operations like: Dropout, Reshape, etc. default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet(OPSET_NO_QUANTIZATION, - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, - default_qco.clone_and_edit(enable_activation_quantization=False, + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_NO_QUANTIZATION, + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_QUANTIZATION_PRESERVING, + default_qco.clone_and_edit(enable_activation_quantization=False, quantization_preserving=True) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) - tp.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, - default_qco.clone_and_edit(enable_activation_quantization=False, + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS, + default_qco.clone_and_edit(enable_activation_quantization=False, quantization_preserving=True, supported_input_activation_n_bits=(8, 16)) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - tp.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor) # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list + mp_cfg_list_16bit, - base_config=base_config) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list + mp_cfg_list_16bit, + base_config=base_config) # Define operator sets that use mixed_precision_configuration_options: - conv = tp.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) - fc = tp.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options) - tp.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit) # Note: Operations sets without quantization configuration are useful for creating fusing patterns - any_relu = tp.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) - add = tp.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) - sub = tp.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) - mul = tp.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) - div = tp.OperatorsSet(OPSET_DIV, const_configuration_options) - tp.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) - prelu = tp.OperatorsSet(OPSET_PRELU, default_config_options_16bit) - swish = tp.OperatorsSet(OPSET_SWISH, default_config_options_16bit) - sigmoid = tp.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) - tanh = tp.OperatorsSet(OPSET_TANH, default_config_options_16bit) - gelu = tp.OperatorsSet(OPSET_GELU, default_config_options_16bit) - hardsigmoid = tp.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) - hardswish = tp.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit) + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_ADD, const_configuration_options_inout16) + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SUB, const_configuration_options_inout16) + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MUL, const_configuration_options_inout16) + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_DIV, const_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16) + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_PRELU, default_config_options_16bit) + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SWISH, default_config_options_16bit) + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit) + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_TANH, default_config_options_16bit) + gelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_GELU, default_config_options_16bit) + hardsigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit) + hardswish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit) # Combine multiple operators into a single operator to avoid quantization between # them. To do this we define fusing patterns using the OperatorsSets that were created. # To group multiple sets with regard to fusing, an OperatorSetConcat can be created - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, - tanh, gelu, hardswish, hardsigmoid) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, - hardswish, hardsigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, + tanh, gelu, hardswish, hardsigmoid) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu, + hardswish, hardsigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpm diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py index 656d57116..73a2bdda1 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \ @@ -53,7 +54,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -64,7 +65,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) no_quant_list = [tf.quantization.fake_quant_with_min_max_vars, tf.math.argmax, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py index 0fa7bda97..3409e6ce8 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py @@ -25,6 +25,7 @@ import torch.nn.functional as F from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, hardsigmoid, leaky_relu, gelu +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS @@ -50,7 +51,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -59,9 +60,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py index df1fca09b..064739130 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -85,7 +84,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -99,7 +98,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, @@ -139,12 +138,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the target platform model instance, and create them as below: @@ -153,17 +157,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # Pytorch supports the next fusing patterns: # [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu] # Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode - conv = tp.OperatorsSet("Conv") - batchnorm = tp.OperatorsSet("BatchNorm") - relu = tp.OperatorsSet("Relu") - linear = tp.OperatorsSet("Linear") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv") + batchnorm = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BatchNorm") + relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Relu") + linear = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Linear") # ------------------- # # Fusions # ------------------- # - tp.Fusing([conv, batchnorm, relu]) - tp.Fusing([conv, batchnorm]) - tp.Fusing([conv, relu]) - tp.Fusing([linear, relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, batchnorm, relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, batchnorm]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([linear, relu]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py index e212f46d7..4a366e3ca 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py @@ -16,6 +16,7 @@ from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \ KERAS_DEPTHWISE_KERNEL, BIAS @@ -41,7 +42,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='qnnpack_keras', tp_model=qnnpack_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -52,9 +53,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("Conv", diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py index 3c059bd42..2c2160c85 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py @@ -16,6 +16,7 @@ from torch.nn import Conv2d, Linear, BatchNorm2d, ConvTranspose2d, Hardtanh, ReLU, ReLU6 from torch.nn.functional import relu, relu6, hardtanh +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -35,7 +36,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='qnnpack_pytorch', tp_model=qnnpack_pytorch) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -44,9 +45,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) # we provide attributes mapping that maps each layer type in the operations set # that has weights attributes with provided quantization config (in the tp model) to diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py index bc766d2f9..154d082a8 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py @@ -15,12 +15,11 @@ from typing import List, Tuple import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel, Signedness -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \ - QuantizationMethod, AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, Signedness, \ + AttributeQuantizationConfig, OpQuantizationConfig tp = mct.target_platform @@ -83,7 +82,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza # We define a default config for operation without kernel attribute. # This is the default config that should be used for non-linear operations. - eight_bits_default = tp.OpQuantizationConfig( + eight_bits_default = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={}, activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, @@ -97,8 +96,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza signedness=Signedness.AUTO) # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes. - linear_eight_bits = tp.OpQuantizationConfig( - activation_quantization_method=QuantizationMethod.UNIFORM, + linear_eight_bits = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig( + activation_quantization_method=tp.QuantizationMethod.UNIFORM, default_weight_attr_config=default_weight_attr_config, attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, activation_n_bits=8, @@ -137,12 +136,17 @@ def generate_tp_model(default_config: OpQuantizationConfig, # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example). # If the QuantizationConfigOptions contains only one configuration, # this configuration will be used for the operation quantization: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) # Create a TargetPlatformModel and set its default quantization config. # This default configuration will be used for all operations # unless specified otherwise (see OperatorsSet, for example): - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=1, + tpc_patch_version=0, + add_metadata=False, + name=name) # To start defining the model's components (such as operator sets, and fusing patterns), # use 'with' the TargetPlatformModel instance, and create them as below: @@ -150,52 +154,52 @@ def generate_tp_model(default_config: OpQuantizationConfig, # In TFLite, the quantized operator specifications constraint operators quantization # differently. For more details: # https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications - tp.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options().clone_and_edit( quantization_preserving=True)) fc_qco = tp.get_default_quantization_config_options() - fc = tp.OperatorsSet("FullyConnected", - fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", + fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False)) - tp.OperatorsSet("L2Normalization", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("L2Normalization", + tp.get_default_quantization_config_options().clone_and_edit( fixed_zero_point=0, fixed_scale=1 / 128)) - tp.OperatorsSet("LogSoftmax", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("LogSoftmax", + tp.get_default_quantization_config_options().clone_and_edit( fixed_zero_point=127, fixed_scale=16 / 256)) - tp.OperatorsSet("Tanh", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh", + tp.get_default_quantization_config_options().clone_and_edit( fixed_zero_point=0, fixed_scale=1 / 128)) - tp.OperatorsSet("Softmax", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Softmax", + tp.get_default_quantization_config_options().clone_and_edit( fixed_zero_point=-128, fixed_scale=1 / 256)) - tp.OperatorsSet("Logistic", - tp.get_default_quantization_config_options().clone_and_edit( + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Logistic", + tp.get_default_quantization_config_options().clone_and_edit( fixed_zero_point=-128, fixed_scale=1 / 256)) - conv2d = tp.OperatorsSet("Conv2d") - kernel = tp.OperatorSetConcat(conv2d, fc) + conv2d = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv2d") + kernel = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(conv2d, fc) - relu = tp.OperatorsSet("Relu") - elu = tp.OperatorsSet("Elu") - activations_to_fuse = tp.OperatorSetConcat(relu, elu) + relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Relu") + elu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Elu") + activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(relu, elu) - batch_norm = tp.OperatorsSet("BatchNorm") - bias_add = tp.OperatorsSet("BiasAdd") - add = tp.OperatorsSet("Add") - squeeze = tp.OperatorsSet("Squeeze", - qc_options=tp.get_default_quantization_config_options().clone_and_edit( + batch_norm = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BatchNorm") + bias_add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BiasAdd") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + squeeze = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Squeeze", + qc_options=tp.get_default_quantization_config_options().clone_and_edit( quantization_preserving=True)) # ------------------- # # Fusions # ------------------- # # Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper - tp.Fusing([kernel, bias_add]) - tp.Fusing([kernel, bias_add, activations_to_fuse]) - tp.Fusing([conv2d, batch_norm, activations_to_fuse]) - tp.Fusing([conv2d, squeeze, activations_to_fuse]) - tp.Fusing([batch_norm, activations_to_fuse]) - tp.Fusing([batch_norm, add, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([kernel, bias_add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([kernel, bias_add, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv2d, batch_norm, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv2d, squeeze, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([batch_norm, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([batch_norm, add, activations_to_fuse]) return generated_tpc diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py index bc1ba09b4..1a0c9102d 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py @@ -15,6 +15,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS @@ -46,7 +47,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities: return generate_keras_tpc(name='tflite_keras', tp_model=tflite_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. @@ -57,9 +58,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - keras_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("NoQuantization", [AveragePooling2D, diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py index 3f7f85d32..4a125a846 100644 --- a/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +++ b/model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py @@ -16,6 +16,7 @@ from torch.nn import AvgPool2d, MaxPool2d from torch.nn.functional import avg_pool2d, max_pool2d, interpolate +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \ BIAS @@ -37,7 +38,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities: return generate_pytorch_tpc(name='tflite_torch', tp_model=tflite_tp_model) -def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): +def generate_pytorch_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): """ Generates a TargetPlatformCapabilities object with default operation sets to layers mapping. Args: @@ -46,9 +47,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel): Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel. """ - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, - name=name, - version=TPC_VERSION) + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) with pytorch_tpc: tp.OperationsSetToLayers("NoQuantization", [AvgPool2d, diff --git a/tests/common_tests/helpers/generate_test_tp_model.py b/tests/common_tests/helpers/generate_test_tp_model.py index a1436d8f7..f70d2a1e2 100644 --- a/tests/common_tests/helpers/generate_test_tp_model.py +++ b/tests/common_tests/helpers/generate_test_tp_model.py @@ -15,11 +15,13 @@ import copy from typing import Dict, List, Any +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH, ACTIVATION_N_BITS_ATTRIBUTE, \ SUPPORTED_INPUT_ACTIVATION_NBITS_ATTRIBUTE from model_compression_toolkit.target_platform_capabilities.constants import OPS_SET_LIST, KERNEL_ATTR, BIAS_ATTR, \ WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, QuantizationConfigOptions, Signedness +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness, OpQuantizationConfig, \ + QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs, generate_tp_model import model_compression_toolkit as mct @@ -37,7 +39,7 @@ def generate_test_tp_model(edit_params_dict, name=""): base_config, op_cfg_list, default_config = get_op_quantization_configs() # separate weights attribute parameters from the requested param to edit - weights_params_names = [name for name in tp.AttributeQuantizationConfig.__init__.__code__.co_varnames if name != 'self'] + weights_params_names = [name for name in model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig.__init__.__code__.co_varnames if name != 'self'] weights_params = {k: v for k, v in edit_params_dict.items() if k in weights_params_names} rest_params = {k: v for k, v in edit_params_dict.items() if k not in list(weights_params.keys())} @@ -104,8 +106,8 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c mixed_precision_cfg_list=mp_op_cfg_list, name=name) - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mp_op_cfg_list, - base_config=base_cfg) + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mp_op_cfg_list, + base_config=base_cfg) operator_sets_dict = {op_set.name: mixed_precision_configuration_options for op_set in base_tp_model.operator_set if op_set.name is not "NoQuantization"} @@ -121,12 +123,17 @@ def generate_tp_model_with_activation_mp(base_cfg, default_config, mp_bitwidth_c def generate_custom_test_tp_model(name: str, base_cfg: OpQuantizationConfig, - base_tp_model: tp.TargetPlatformModel, + base_tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel, operator_sets_dict: Dict[str, QuantizationConfigOptions] = None): - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) - custom_tp_model = tp.TargetPlatformModel(default_configuration_options, name=name) + custom_tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name=name) with custom_tp_model: for op_set in base_tp_model.operator_set: @@ -136,22 +143,22 @@ def generate_custom_test_tp_model(name: str, else: qc_options = op_set.qc_options - tp.OperatorsSet(op_set.name, qc_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(op_set.name, qc_options) existing_op_sets_names = [op_set.name for op_set in base_tp_model.operator_set] for op_set_name, op_set_qc_options in operator_sets_dict.items(): # Add new OperatorSets from the given operator_sets_dict if op_set_name not in existing_op_sets_names: - tp.OperatorsSet(op_set_name, op_set_qc_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(op_set_name, op_set_qc_options) for fusion in base_tp_model.fusing_patterns: - tp.Fusing(fusion.operator_groups_list) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing(fusion.operator_groups_list) return custom_tp_model def generate_test_tpc(name: str, - tp_model: tp.TargetPlatformModel, + tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel, base_tpc: tp.TargetPlatformCapabilities, op_sets_to_layer_add: Dict[str, List[Any]] = None, op_sets_to_layer_drop: Dict[str, List[Any]] = None, @@ -174,7 +181,7 @@ def generate_test_tpc(name: str, # Remove empty op sets merged_dict = {op_set_name: layers for op_set_name, layers in merged_dict.items() if len(layers) == 0} - tpc = tp.TargetPlatformCapabilities(tp_model, name=name) + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: for op_set_name, layers in merged_dict.items(): @@ -191,21 +198,21 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, enable_kernel_weights_quantization: bool = True, kernel_lut_values_bitwidth: int = None): - default_weight_attr_config = tp.AttributeQuantizationConfig( + default_weight_attr_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( weights_quantization_method=default_cfg_quantizatiom_method, weights_n_bits=default_cfg_nbits, weights_per_channel_threshold=False, enable_weights_quantization=False, lut_values_bitwidth=None) - kernel_base_config = tp.AttributeQuantizationConfig( + kernel_base_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( weights_quantization_method=kernel_cfg_quantizatiom_method, weights_n_bits=kernel_cfg_nbits, weights_per_channel_threshold=True, enable_weights_quantization=enable_kernel_weights_quantization, lut_values_bitwidth=kernel_lut_values_bitwidth) - bias_config = tp.AttributeQuantizationConfig( + bias_config = model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig( weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, weights_n_bits=FLOAT_BITWIDTH, weights_per_channel_threshold=False, @@ -217,22 +224,22 @@ def generate_test_attr_configs(default_cfg_nbits: int = 8, BIAS_CONFIG: bias_config} -def generate_test_op_qc(default_weight_attr_config: tp.AttributeQuantizationConfig, - kernel_base_config: tp.AttributeQuantizationConfig, - bias_config: tp.AttributeQuantizationConfig, +def generate_test_op_qc(default_weight_attr_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, + kernel_base_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, + bias_config: model_compression_toolkit.target_platform_capabilities.schema.v1.AttributeQuantizationConfig, enable_activation_quantization: bool = True, activation_n_bits: int = 8, activation_quantization_method: tp.QuantizationMethod = tp.QuantizationMethod.POWER_OF_TWO): - return tp.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, - default_weight_attr_config=default_weight_attr_config, - attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, + return model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=enable_activation_quantization, + default_weight_attr_config=default_weight_attr_config, + attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config}, - activation_n_bits=activation_n_bits, - supported_input_activation_n_bits=activation_n_bits, - activation_quantization_method=activation_quantization_method, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) + activation_n_bits=activation_n_bits, + supported_input_activation_n_bits=activation_n_bits, + activation_quantization_method=activation_quantization_method, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) diff --git a/tests/common_tests/test_tp_model.py b/tests/common_tests/test_tp_model.py index e84d0936d..708d698ea 100644 --- a/tests/common_tests/test_tp_model.py +++ b/tests/common_tests/test_tp_model.py @@ -16,6 +16,7 @@ import unittest import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR @@ -25,7 +26,7 @@ tp = mct.target_platform TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) class TargetPlatformModelingTest(unittest.TestCase): @@ -36,27 +37,39 @@ def test_not_initialized_tp(self): self.assertEqual('Target platform model is not initialized.', str(e.exception)) def test_get_default_options(self): - with tp.TargetPlatformModel(TEST_QCO): + with model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False): self.assertEqual(tp.get_default_quantization_config_options(), TEST_QCO) def test_immutable_tp(self): - model = tp.TargetPlatformModel(TEST_QCO) + model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with self.assertRaises(Exception) as e: with model: - tp.OperatorsSet("opset") + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opset") model.operator_set = [] self.assertEqual('Immutable class. Can\'t edit attributes.', str(e.exception)) def test_default_options_more_than_single_qc(self): - test_qco = tp.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) + test_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, TEST_QC], base_config=TEST_QC) with self.assertRaises(Exception) as e: - tp.TargetPlatformModel(test_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(test_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) self.assertEqual('Default QuantizationConfigOptions must contain only one option', str(e.exception)) def test_tp_model_show(self): - tpm = tp.TargetPlatformModel(TEST_QCO) + tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with tpm: - a = tp.OperatorsSet("opA") + a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") tpm.show() @@ -64,11 +77,15 @@ def test_tp_model_show(self): class OpsetTest(unittest.TestCase): def test_opset_qco(self): - hm = tp.TargetPlatformModel(TEST_QCO, name='test') + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name='test') opset_name = "ops_3bit" with hm: qco_3bit = get_default_quantization_config_options().clone_and_edit(activation_n_bits=3) - tp.OperatorsSet(opset_name, qco_3bit) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet(opset_name, qco_3bit) for op_qc in hm.get_config_options_by_operators_set(opset_name).quantization_config_list: self.assertEqual(op_qc.activation_n_bits, 3) @@ -80,23 +97,31 @@ def test_opset_qco(self): hm.default_qco) def test_opset_concat(self): - hm = tp.TargetPlatformModel(TEST_QCO, name='test') + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(TEST_QCO, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name='test') with hm: - a = tp.OperatorsSet('opset_A') - b = tp.OperatorsSet('opset_B', - get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) - tp.OperatorsSet('opset_C') # Just add it without using it in concat - tp.OperatorSetConcat(a, b) + a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_A') + b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_B', + get_default_quantization_config_options().clone_and_edit(activation_n_bits=2)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opset_C') # Just add it without using it in concat + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(a, b) self.assertEqual(len(hm.operator_set), 4) self.assertTrue(hm.is_opset_in_model("opset_A_opset_B")) self.assertTrue(hm.get_config_options_by_operators_set('opset_A_opset_B') is None) def test_non_unique_opset(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with self.assertRaises(Exception) as e: with hm: - tp.OperatorsSet("conv") - tp.OperatorsSet("conv") + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") self.assertEqual('Operator Sets must have unique names.', str(e.exception)) @@ -104,13 +129,13 @@ class QCOptionsTest(unittest.TestCase): def test_empty_qc_options(self): with self.assertRaises(AssertionError) as e: - tp.QuantizationConfigOptions([]) + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([]) self.assertEqual("'QuantizationConfigOptions' requires at least one 'OpQuantizationConfig'. The provided list is empty.", str(e.exception)) def test_list_of_no_qc(self): with self.assertRaises(AssertionError) as e: - tp.QuantizationConfigOptions([TEST_QC, 3]) + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, 3]) self.assertEqual( 'Each option must be an instance of \'OpQuantizationConfig\', but found an object of type: .', str(e.exception)) @@ -123,9 +148,9 @@ def test_clone_and_edit_options(self): self.assertEqual(modified_options.quantization_config_list[0].attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, 5) def test_qco_without_base_config(self): - tp.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) # Should work fine as it has only one qc. with self.assertRaises(Exception) as e: - tp.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC, TEST_QC]) # Should raise exception as base_config was not passed self.assertEqual( 'For multiple configurations, a \'base_config\' is required for non-mixed-precision optimization.', str(e.exception)) @@ -140,21 +165,29 @@ def test_get_qco_for_none_tpc(self): class FusingTest(unittest.TestCase): def test_fusing_single_opset(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version = None, + tpc_patch_version = None, + add_metadata = False) with hm: - add = tp.OperatorsSet("add") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") with self.assertRaises(Exception) as e: - tp.Fusing([add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([add]) self.assertEqual('Fusing can not be created for a single operators group', str(e.exception)) def test_fusing_contains(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - conv = tp.OperatorsSet("conv") - add = tp.OperatorsSet("add") - tanh = tp.OperatorsSet("tanh") - tp.Fusing([conv, add]) - tp.Fusing([conv, add, tanh]) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh") + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 2) f0, f1 = hm.fusing_patterns[0], hm.fusing_patterns[1] @@ -164,15 +197,19 @@ def test_fusing_contains(self): self.assertTrue(f1.contains(f1)) def test_fusing_contains_with_opset_concat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - conv = tp.OperatorsSet("conv") - add = tp.OperatorsSet("add") - tanh = tp.OperatorsSet("tanh") - add_tanh = tp.OperatorSetConcat(add, tanh) - tp.Fusing([conv, add]) - tp.Fusing([conv, add_tanh]) - tp.Fusing([conv, add, tanh]) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("add") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh") + add_tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, tanh) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add_tanh]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, tanh]) self.assertEqual(len(hm.fusing_patterns), 3) f0, f1, f2 = hm.fusing_patterns[0], hm.fusing_patterns[1], hm.fusing_patterns[2] diff --git a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py index 038dfc769..9b2283db6 100644 --- a/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py +++ b/tests/keras_tests/exporter_tests/tflite_int8/imx500_int8_tp_model.py @@ -17,6 +17,7 @@ import tensorflow as tf from packaging import version +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS, \ KERAS_DEPTHWISE_KERNEL, WEIGHTS_N_BITS @@ -32,8 +33,7 @@ Conv2DTranspose import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \ - TargetPlatformModel +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OpQuantizationConfig tp = mct.target_platform @@ -66,35 +66,40 @@ def generate_tp_model(default_config: OpQuantizationConfig, base_config: OpQuantizationConfig, mixed_precision_cfg_list: List[OpQuantizationConfig], name: str) -> TargetPlatformModel: - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + [default_config]) + generated_tpc = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, name=name) with generated_tpc: - tp.OperatorsSet("NoQuantization", - tp.get_default_quantization_config_options() - .clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) - - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) - - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - sub = tp.OperatorsSet("Sub") - mul = tp.OperatorsSet("Mul") - div = tp.OperatorsSet("Div") - prelu = tp.OperatorsSet("PReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) - activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid) - any_binary = tp.OperatorSetConcat(add, sub, mul, div) - tp.Fusing([conv, activations_after_conv_to_fuse]) - tp.Fusing([fc, activations_after_fc_to_fuse]) - tp.Fusing([any_binary, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + tp.get_default_quantization_config_options() + .clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) + + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + sub = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sub") + mul = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Mul") + div = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Div") + prelu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("PReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh) + activations_after_fc_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid) + any_binary = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(add, sub, mul, div) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_after_fc_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([any_binary, any_relu]) return generated_tpc @@ -104,8 +109,8 @@ def get_int8_tpc(edit_weights_params_dict={}, edit_act_params_dict={}) -> tp.Tar return generate_keras_tpc(name='int8_tpc', tp_model=default_tp_model) -def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel): - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name=name, version='v1') +def generate_keras_tpc(name: str, tp_model: model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel): + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers("NoQuantization", [Reshape, diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py index 95ea6d019..4aa4e8d89 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/bn_attributes_quantization_test.py @@ -16,10 +16,11 @@ import numpy as np import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from mct_quantizers import QuantizationMethod, KerasQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import GAMMA, BETA -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS, BIAS_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, \ DEFAULT_WEIGHT_ATTR_CONFIG, KERNEL_BASE_CONFIG, generate_test_op_qc, BIAS_CONFIG @@ -52,46 +53,50 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = tp.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = tp.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = tp.TargetPlatformModel(default_configuration_options, name='bn_quantized_tpm') + bn_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - tp.OperatorsSet("Conv", linear_configuration_options) - tp.OperatorsSet("BN", bn_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", linear_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_configuration_options) return generated_tpm def _generate_bn_quantized_tpc(tp_model): - tpc = tp.TargetPlatformCapabilities(tp_model, name='bn_quantized_tpc') + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: tp.OperationsSetToLayers("Conv", [layers.Conv2D], diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py index df8929552..5017f2d88 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py @@ -17,6 +17,7 @@ import numpy as np import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import generate_tp_model, \ get_op_quantization_configs @@ -48,11 +49,11 @@ def create_const_quant_tpc(qmethod): default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=True, weights_n_bits=16, weights_quantization_method=qmethod)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) const_merge_config = default_cfg.clone_and_edit( default_weight_attr_config=default_cfg.default_weight_attr_config.clone_and_edit( weights_per_channel_threshold=False)) - const_merge_configuration_options = tp.QuantizationConfigOptions([const_merge_config]) + const_merge_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_merge_config]) operator_sets_dict = {} operator_sets_dict["Add"] = const_configuration_options diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py index acafc9dd6..52eecdfce 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py @@ -18,6 +18,7 @@ import tensorflow as tf from keras.activations import sigmoid, softmax +import model_compression_toolkit.target_platform_capabilities.schema.v1 from mct_quantizers import KerasActivationQuantizationHolder from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.keras.constants import SIGMOID, SOFTMAX, BIAS @@ -640,24 +641,28 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = tp.QuantizationConfigOptions( + act_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = tp.TargetPlatformModel(tp.QuantizationConfigOptions([cfg], cfg), - name="mp_activation_conf_weights_test") + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name="mp_activation_conf_weights_test") with tp_model: - tp.OperatorsSet("Activations", act_mixed_cfg) - tp.OperatorsSet("Weights", weight_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Activations", act_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights", weight_mixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_activation_conf_weights_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py index e1fb7c5b1..ce994984c 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py @@ -17,6 +17,7 @@ import numpy as np import tensorflow as tf +import model_compression_toolkit.target_platform_capabilities.schema.v1 from mct_quantizers import KerasQuantizationWrapper from model_compression_toolkit.core.keras.constants import KERNEL from model_compression_toolkit.defaultdict import DefaultDict @@ -177,22 +178,27 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = tp.QuantizationConfigOptions( + weight_fixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = tp.TargetPlatformModel(weight_fixed_cfg, name="mp_part_weights_layers_test") + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + weight_fixed_cfg, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name="mp_part_weights_layers_test") with tp_model: - tp.OperatorsSet("Weights_mp", weight_mixed_cfg) - tp.OperatorsSet("Weights_fixed", weight_fixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_mp", weight_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_fixed", weight_fixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_part_weights_layers_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( @@ -505,24 +511,28 @@ def get_tpc(self): [c.clone_and_edit(enable_activation_quantization=False) for c in mixed_precision_cfg_list] cfg = mixed_precision_cfg_list[0] - act_mixed_cfg = tp.QuantizationConfigOptions( + act_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( [act_eight_bit_cfg, act_four_bit_cfg, act_two_bit_cfg], base_config=act_eight_bit_cfg, ) - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - tp_model = tp.TargetPlatformModel(tp.QuantizationConfigOptions([cfg], cfg), - name="mp_weights_conf_act_test") + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name="mp_weights_conf_act_test") with tp_model: - tp.OperatorsSet("Activations", act_mixed_cfg) - tp.OperatorsSet("Weights", weight_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Activations", act_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights", weight_mixed_cfg) - keras_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_weights_conf_act_test") + keras_tpc = tp.TargetPlatformCapabilities(tp_model) with keras_tpc: tp.OperationsSetToLayers( diff --git a/tests/keras_tests/function_tests/test_custom_layer.py b/tests/keras_tests/function_tests/test_custom_layer.py index 33ea3be4e..f83e74a5b 100644 --- a/tests/keras_tests/function_tests/test_custom_layer.py +++ b/tests/keras_tests/function_tests/test_custom_layer.py @@ -18,7 +18,8 @@ import tensorflow as tf import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, DEFAULT_WEIGHT_ATTR_CONFIG, \ KERNEL_BASE_CONFIG, BIAS_CONFIG @@ -63,25 +64,28 @@ def get_tpc(): """ tp = mct.target_platform attr_cfg = generate_test_attr_configs(kernel_lut_values_bitwidth=0) - base_cfg = tp.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) - tp_model = tp.TargetPlatformModel(default_configuration_options) + base_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with tp_model: default_qco = tp.get_default_quantization_config_options() - tp.OperatorsSet("NoQuantization", - default_qco.clone_and_edit(enable_activation_quantization=False) - .clone_and_edit_weight_attribute(enable_weights_quantization=False)) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("NoQuantization", + default_qco.clone_and_edit(enable_activation_quantization=False) + .clone_and_edit_weight_attribute(enable_weights_quantization=False)) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: diff --git a/tests/keras_tests/function_tests/test_hmse_error_method.py b/tests/keras_tests/function_tests/test_hmse_error_method.py index 24b7eff49..bbe4fd055 100644 --- a/tests/keras_tests/function_tests/test_hmse_error_method.py +++ b/tests/keras_tests/function_tests/test_hmse_error_method.py @@ -19,6 +19,7 @@ from tensorflow.keras import layers import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit import DefaultDict from model_compression_toolkit.core import QuantizationConfig from model_compression_toolkit.constants import THRESHOLD, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES @@ -29,7 +30,7 @@ calculate_quantization_params from model_compression_toolkit.core.keras.constants import KERNEL, GAMMA from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, KERAS_KERNEL, BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import generate_keras_tpc from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation @@ -174,18 +175,21 @@ def test_threshold_selection_hmse_no_gptq(self): def test_threshold_selection_hmse_no_kernel_attr(self): def _generate_bn_quantization_tpc(quant_method, per_channel): cfg, _, _ = get_op_quantization_configs() - conv_qco = tp.QuantizationConfigOptions([cfg], base_config=cfg) + conv_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([cfg], base_config=cfg) # enable BN attributes quantization using the bn_qco = conv_qco.clone_and_edit(attr_weights_configs_mapping= {GAMMA: AttributeQuantizationConfig(weights_n_bits=8, enable_weights_quantization=True)}) - tp_model = tp.TargetPlatformModel(conv_qco) + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(conv_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with tp_model: - tp.OperatorsSet("Linear", conv_qco) - tp.OperatorsSet("BN", bn_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Linear", conv_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_qco) tpc = tp.TargetPlatformCapabilities(tp_model) diff --git a/tests/keras_tests/function_tests/test_layer_fusing.py b/tests/keras_tests/function_tests/test_layer_fusing.py index 8bf3c5c5e..56658f542 100644 --- a/tests/keras_tests/function_tests/test_layer_fusing.py +++ b/tests/keras_tests/function_tests/test_layer_fusing.py @@ -2,6 +2,7 @@ import numpy as np import tensorflow as tf +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.core import DEFAULTCONFIG from model_compression_toolkit.core.common.fusion.layer_fusing import fusion from model_compression_toolkit.core.common.quantization.set_node_quantization_config import \ @@ -79,10 +80,15 @@ def create_network_4(input_shape): def generate_base_tpc(): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tp = tp.TargetPlatformModel(default_configuration_options, name='layer_fusing_test') - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( + [default_config]) + generated_tp = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, name='layer_fusing_test') + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options @@ -90,12 +96,12 @@ def generate_base_tpc(): def get_tpc_1(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -107,16 +113,16 @@ def get_tpc_1(): def get_tpc_2(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - swish = tp.OperatorsSet("Swish") - sigmoid = tp.OperatorsSet("Sigmoid") - tanh = tp.OperatorsSet("Tanh") - activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid, tanh) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + sigmoid = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Sigmoid") + tanh = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Tanh") + activations_after_conv_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish, sigmoid, tanh) # Define fusions - tp.Fusing([conv, activations_after_conv_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_after_conv_to_fuse]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D, DepthwiseConv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -131,12 +137,12 @@ def get_tpc_2(): def get_tpc_3(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu, @@ -148,19 +154,19 @@ def get_tpc_3(): def get_tpc_4(): generated_tp, mixed_precision_configuration_options = generate_base_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - swish = tp.OperatorsSet("Swish") - activations_to_fuse = tp.OperatorSetConcat(any_relu, swish) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish) # Define fusions - tp.Fusing([conv, activations_to_fuse]) - tp.Fusing([conv, add, activations_to_fuse]) - tp.Fusing([conv, activations_to_fuse, add]) - tp.Fusing([fc, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse, add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_to_fuse]) - keras_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + keras_tpc = tp.TargetPlatformCapabilities(generated_tp) with keras_tpc: tp.OperationsSetToLayers("Conv", [Conv2D]) tp.OperationsSetToLayers("FullyConnected", [Dense]) diff --git a/tests/keras_tests/function_tests/test_node_quantization_configurations.py b/tests/keras_tests/function_tests/test_node_quantization_configurations.py index 462a9b0e8..08507cbab 100644 --- a/tests/keras_tests/function_tests/test_node_quantization_configurations.py +++ b/tests/keras_tests/function_tests/test_node_quantization_configurations.py @@ -23,7 +23,7 @@ power_of_two_selection_histogram from model_compression_toolkit.core.common.quantization.quantizers.uniform_quantizers import power_of_two_quantizer from model_compression_toolkit.core.keras.constants import KERNEL, BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import AttributeQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.v1 import AttributeQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs diff --git a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py index 61642d29a..adb34aaa0 100644 --- a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py +++ b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py @@ -22,6 +22,7 @@ from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core.common import BaseNode from tests.common_tests.helpers.generate_test_tp_model import generate_test_op_qc, generate_test_attr_configs @@ -49,7 +50,7 @@ TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) def get_node(layer) -> BaseNode: @@ -104,9 +105,13 @@ def test_keras_layers_with_params(self): self.assertFalse(get_node(conv).is_match_filter_params(conv_filter_contains)) def test_get_layers_by_op(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - op_obj = tp.OperatorsSet('opsetA') + op_obj = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [Conv2D, LayerFilterParams(ReLU, max_value=2)] @@ -116,11 +121,15 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset_name('nonExistingOpsetName'), None) def test_get_layers_by_opconcat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - op_obj_a = tp.OperatorsSet('opsetA') - op_obj_b = tp.OperatorsSet('opsetB') - op_concat = tp.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + op_obj_b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + op_concat = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -133,10 +142,14 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -146,10 +159,14 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2D in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -159,23 +176,27 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Activation(activation=relu) in more than one OperatorsSet', str(e.exception)) def test_qco_by_keras_layer(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = tp.TargetPlatformModel(default_qco, name='test') + tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = tp.QuantizationConfigOptions( + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( quantization_config_list=[TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - tp.OperatorsSet("conv", mixed_precision_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - tp.OperatorsSet("tanh", sevenbit_qco) - tp.OperatorsSet("relu") + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh", sevenbit_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("relu") - tpc_keras = tp.TargetPlatformCapabilities(tpm, name='fw_test') + tpc_keras = tp.TargetPlatformCapabilities(tpm) with tpc_keras: tp.OperationsSetToLayers("conv", [Conv2D], attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL), @@ -200,8 +221,11 @@ def test_qco_by_keras_layer(self): self.assertEqual(relu_qco, default_qco) def test_opset_not_in_tp(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) hm_keras = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_keras: @@ -211,14 +235,17 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_keras_fusing_patterns(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - a = tp.OperatorsSet("opA") - b = tp.OperatorsSet("opB") - c = tp.OperatorsSet("opC") - tp.Fusing([a, b, c]) - tp.Fusing([a, c]) + a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") + b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opB") + c = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opC") + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, b, c]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -240,10 +267,13 @@ def test_keras_fusing_patterns(self): self.assertEqual(p1[1], LayerFilterParams(ReLU, Greater("max_value", 7), negative_slope=0)) def test_get_default_op_qc(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - tpm = tp.TargetPlatformModel(default_qco) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with tpm: - a = tp.OperatorsSet("opA") + a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") tpc = tp.TargetPlatformCapabilities(tpm) with tpc: @@ -279,37 +309,37 @@ def rep_data(): def test_get_keras_supported_version(self): tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL) # Latest - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1_pot') - self.assertTrue(tpc.version == 'v1_pot') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1_lut') - self.assertTrue(tpc.version == 'v1_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v1') - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v2_lut') - self.assertTrue(tpc.version == 'v2_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL, 'v2') - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v2") - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1_lut") - self.assertTrue(tpc.version == 'v1_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 11) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v2_lut") - self.assertTrue(tpc.version == 'v2_lut') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(TENSORFLOW, IMX500_TP_MODEL, "v1_pot") - self.assertTrue(tpc.version == 'v1_pot') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, TFLITE_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(TENSORFLOW, QNNPACK_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) def test_get_keras_not_supported_platform(self): with self.assertRaises(Exception) as e: diff --git a/tests/pytorch_tests/function_tests/layer_fusing_test.py b/tests/pytorch_tests/function_tests/layer_fusing_test.py index ee24b3324..36cf21caa 100644 --- a/tests/pytorch_tests/function_tests/layer_fusing_test.py +++ b/tests/pytorch_tests/function_tests/layer_fusing_test.py @@ -17,6 +17,7 @@ from torch.nn import Conv2d, ReLU, SiLU, Sigmoid, Linear, Hardtanh from torch.nn.functional import relu, relu6 +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.target_platform_capabilities.target_platform import LayerFilterParams from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation @@ -47,10 +48,13 @@ def get_type(self, fusion): def get_tpc(self): base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs() - default_configuration_options = tp.QuantizationConfigOptions([default_config]) - generated_tp = tp.TargetPlatformModel(default_configuration_options, name='layer_fusing_test') - mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list, - base_config=base_config) + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_config]) + generated_tp = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + name='layer_fusing_test') + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions(mixed_precision_cfg_list, + base_config=base_config) return generated_tp, mixed_precision_configuration_options def _compare(self, fused_nodes): @@ -67,12 +71,12 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") # Define fusions - tp.Fusing([conv, any_relu]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_relu]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [nn.Conv2d]) tp.OperationsSetToLayers("AnyReLU", [torch.relu, @@ -109,12 +113,12 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = tp.OperatorsSet("AnyAct") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyAct") # Define fusions - tp.Fusing([conv, any_act]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_act]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu,SiLU,Sigmoid, LayerFilterParams(Hardtanh, min_val=0)]) @@ -161,12 +165,12 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - any_act = tp.OperatorsSet("AnyAct") + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + any_act = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyAct") # Define fusions - tp.Fusing([conv, any_act]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, any_act]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) tp.OperationsSetToLayers("AnyAct", [ReLU,relu6,relu]) @@ -213,19 +217,19 @@ def __init__(self, unit_test): def get_tpc(self): generated_tp, mixed_precision_configuration_options = super().get_tpc() with generated_tp: - conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options) - fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options) - any_relu = tp.OperatorsSet("AnyReLU") - add = tp.OperatorsSet("Add") - swish = tp.OperatorsSet("Swish") - activations_to_fuse = tp.OperatorSetConcat(any_relu, swish) + conv = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", mixed_precision_configuration_options) + fc = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("FullyConnected", mixed_precision_configuration_options) + any_relu = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("AnyReLU") + add = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Add") + swish = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Swish") + activations_to_fuse = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(any_relu, swish) # Define fusions - tp.Fusing([conv, activations_to_fuse]) - tp.Fusing([conv, add, activations_to_fuse]) - tp.Fusing([conv, activations_to_fuse, add]) - tp.Fusing([fc, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, add, activations_to_fuse]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([conv, activations_to_fuse, add]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([fc, activations_to_fuse]) - pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp, name='layer_fusing_test') + pytorch_tpc = tp.TargetPlatformCapabilities(generated_tp) with pytorch_tpc: tp.OperationsSetToLayers("Conv", [Conv2d]) tp.OperationsSetToLayers("FullyConnected", [Linear]) diff --git a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py index 1ce79dd82..6ff0ee1a3 100644 --- a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py +++ b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py @@ -23,6 +23,7 @@ from torchvision.models import mobilenet_v2 import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.constants import PYTORCH @@ -40,7 +41,7 @@ TEST_QC = generate_test_op_qc(**generate_test_attr_configs()) -TEST_QCO = tp.QuantizationConfigOptions([TEST_QC]) +TEST_QCO = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) class TestPytorchTPModel(unittest.TestCase): @@ -79,29 +80,33 @@ def test_pytorch_layers_with_params(self): def test_qco_by_pytorch_layer(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) default_qco = default_qco.clone_and_edit(attr_weights_configs_mapping={}) - tpm = tp.TargetPlatformModel(default_qco, name='test') + tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, + name='test') with tpm: - mixed_precision_configuration_options = tp.QuantizationConfigOptions( + mixed_precision_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( [TEST_QC, TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}}), TEST_QC.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}})], base_config=TEST_QC) - tp.OperatorsSet("conv", mixed_precision_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("conv", mixed_precision_configuration_options) sevenbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=7, attr_weights_configs_mapping={}) - tp.OperatorsSet("tanh", sevenbit_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("tanh", sevenbit_qco) sixbit_qco = TEST_QCO.clone_and_edit(activation_n_bits=6, attr_weights_configs_mapping={}) - tp.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("avg_pool2d_kernel_2", sixbit_qco) - tp.OperatorsSet("avg_pool2d") + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("avg_pool2d") - tpc_pytorch = tp.TargetPlatformCapabilities(tpm, name='fw_test') + tpc_pytorch = tp.TargetPlatformCapabilities(tpm) with tpc_pytorch: tp.OperationsSetToLayers("conv", [torch.nn.Conv2d], attr_mapping={KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL), @@ -133,9 +138,13 @@ def test_qco_by_pytorch_layer(self): self.assertEqual(avg_pool2d_qco, default_qco) def test_get_layers_by_op(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - op_obj = tp.OperatorsSet('opsetA') + op_obj = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') fw_tp = TargetPlatformCapabilities(hm) with fw_tp: opset_layers = [torch.nn.Conv2d, LayerFilterParams(torch.nn.Softmax, dim=1)] @@ -144,11 +153,15 @@ def test_get_layers_by_op(self): self.assertEqual(fw_tp.get_layers_by_opset(op_obj), opset_layers) def test_get_layers_by_opconcat(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - op_obj_a = tp.OperatorsSet('opsetA') - op_obj_b = tp.OperatorsSet('opsetB') - op_concat = tp.OperatorSetConcat(op_obj_a, op_obj_b) + op_obj_a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + op_obj_b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') + op_concat = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorSetConcat(op_obj_a, op_obj_b) fw_tp = TargetPlatformCapabilities(hm) with fw_tp: @@ -161,10 +174,14 @@ def test_get_layers_by_opconcat(self): self.assertEqual(fw_tp.get_layers_by_opset(op_concat), opset_layers_a + opset_layers_b) def test_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -174,10 +191,14 @@ def test_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Conv2d in more than one OperatorsSet', str(e.exception)) def test_filter_layer_attached_to_multiple_opsets(self): - hm = tp.TargetPlatformModel(tp.QuantizationConfigOptions([TEST_QC])) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - tp.OperatorsSet('opsetA') - tp.OperatorsSet('opsetB') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetA') + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet('opsetB') fw_tp = TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: @@ -187,8 +208,11 @@ def test_filter_layer_attached_to_multiple_opsets(self): self.assertEqual('Found layer Softmax(dim=2) in more than one OperatorsSet', str(e.exception)) def test_opset_not_in_tp(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) hm_pytorch = tp.TargetPlatformCapabilities(hm) with self.assertRaises(Exception) as e: with hm_pytorch: @@ -198,14 +222,17 @@ def test_opset_not_in_tp(self): str(e.exception)) def test_pytorch_fusing_patterns(self): - default_qco = tp.QuantizationConfigOptions([TEST_QC]) - hm = tp.TargetPlatformModel(default_qco) + default_qco = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([TEST_QC]) + hm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(default_qco, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with hm: - a = tp.OperatorsSet("opA") - b = tp.OperatorsSet("opB") - c = tp.OperatorsSet("opC") - tp.Fusing([a, b, c]) - tp.Fusing([a, c]) + a = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opA") + b = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opB") + c = model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("opC") + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, b, c]) + model_compression_toolkit.target_platform_capabilities.schema.v1.Fusing([a, c]) hm_keras = tp.TargetPlatformCapabilities(hm) with hm_keras: @@ -252,23 +279,23 @@ def rep_data(): def test_get_pytorch_supported_version(self): tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL) # Latest - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL, 'v1') - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL, 'v2') - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, IMX500_TP_MODEL, "v2") - self.assertTrue(tpc.version == 'v2') + self.assertTrue(tpc.tp_model.tpc_minor_version == 2) tpc = mct.get_target_platform_capabilities(PYTORCH, TFLITE_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) tpc = mct.get_target_platform_capabilities(PYTORCH, QNNPACK_TP_MODEL, "v1") - self.assertTrue(tpc.version == 'v1') + self.assertTrue(tpc.tp_model.tpc_minor_version == 1) def test_get_pytorch_not_supported_platform(self): with self.assertRaises(Exception) as e: diff --git a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py index ebc308cfe..f50e8a7f1 100644 --- a/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/bn_attributes_quantization_test.py @@ -16,13 +16,14 @@ from torch import nn import model_compression_toolkit as mct +import model_compression_toolkit.target_platform_capabilities.schema.v1 from mct_quantizers import QuantizationMethod, PytorchQuantizationWrapper from model_compression_toolkit import DefaultDict from model_compression_toolkit.core.pytorch.constants import GAMMA, BETA from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS, BIAS_ATTR from tests.common_tests.helpers.generate_test_tp_model import generate_test_attr_configs, \ DEFAULT_WEIGHT_ATTR_CONFIG, KERNEL_BASE_CONFIG, generate_test_op_qc, BIAS_CONFIG -from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest from tests.pytorch_tests.utils import get_layers_from_model_by_type @@ -50,46 +51,50 @@ def _generate_bn_quantized_tpm(quantize_linear): bias_config=attr_cfgs_dict[BIAS_CONFIG], enable_activation_quantization=False) - bn_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_op_qc = tp.OpQuantizationConfig(enable_activation_quantization=False, - default_weight_attr_config=default_attr_cfg, - attr_weights_configs_mapping={}, - activation_n_bits=8, - supported_input_activation_n_bits=8, - activation_quantization_method=QuantizationMethod.POWER_OF_TWO, - quantization_preserving=False, - fixed_scale=None, - fixed_zero_point=None, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([default_op_qc]) - linear_configuration_options = tp.QuantizationConfigOptions([linear_op_qc]) - bn_configuration_options = tp.QuantizationConfigOptions([bn_op_qc]) - - generated_tpm = tp.TargetPlatformModel(default_configuration_options, name='bn_quantized_tpm') + bn_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={BETA: bn_attr_cfg, GAMMA: bn_attr_cfg}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_op_qc = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(enable_activation_quantization=False, + default_weight_attr_config=default_attr_cfg, + attr_weights_configs_mapping={}, + activation_n_bits=8, + supported_input_activation_n_bits=8, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([default_op_qc]) + linear_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([linear_op_qc]) + bn_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([bn_op_qc]) + + generated_tpm = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, name='bn_quantized_tpm') with generated_tpm: - tp.OperatorsSet("Conv", linear_configuration_options) - tp.OperatorsSet("BN", bn_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Conv", linear_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("BN", bn_configuration_options) return generated_tpm def _generate_bn_quantized_tpc(tp_model): - tpc = tp.TargetPlatformCapabilities(tp_model, name='bn_quantized_tpc') + tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: tp.OperationsSetToLayers("Conv", [nn.Conv2d], diff --git a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py index 099b74f7c..569a383c3 100644 --- a/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/const_quantization_test.py @@ -17,7 +17,8 @@ import torch.nn as nn import numpy as np import model_compression_toolkit as mct -from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import Signedness +import model_compression_toolkit.target_platform_capabilities.schema.v1 +from model_compression_toolkit.target_platform_capabilities.schema.v1 import Signedness from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, torch_tensor_to_numpy, set_model from tests.pytorch_tests.model_tests.base_pytorch_feature_test import BasePytorchFeatureNetworkTest @@ -225,29 +226,33 @@ def generate_inputs(self): def get_tpc(self): tp = mct.target_platform attr_cfg = generate_test_attr_configs() - base_cfg = tp.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, - enable_activation_quantization=True, - activation_n_bits=32, - supported_input_activation_n_bits=32, - default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], - attr_weights_configs_mapping={}, - quantization_preserving=False, - fixed_scale=1.0, - fixed_zero_point=0, - simd_size=32, - signedness=Signedness.AUTO) - - default_configuration_options = tp.QuantizationConfigOptions([base_cfg]) + base_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.OpQuantizationConfig(activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO, + enable_activation_quantization=True, + activation_n_bits=32, + supported_input_activation_n_bits=32, + default_weight_attr_config=attr_cfg[DEFAULT_WEIGHT_ATTR_CONFIG], + attr_weights_configs_mapping={}, + quantization_preserving=False, + fixed_scale=1.0, + fixed_zero_point=0, + simd_size=32, + signedness=Signedness.AUTO) + + default_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([base_cfg]) const_config = base_cfg.clone_and_edit(enable_activation_quantization=False, default_weight_attr_config=base_cfg.default_weight_attr_config.clone_and_edit( enable_weights_quantization=True, weights_per_channel_threshold=False, weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)) - const_configuration_options = tp.QuantizationConfigOptions([const_config]) + const_configuration_options = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions([const_config]) - tp_model = tp.TargetPlatformModel(default_configuration_options) + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel( + default_configuration_options, + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False) with tp_model: - tp.OperatorsSet("WeightQuant", const_configuration_options) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("WeightQuant", const_configuration_options) tpc = tp.TargetPlatformCapabilities(tp_model) with tpc: diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py index 426e71f83..352291259 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py @@ -22,8 +22,9 @@ from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \ BIAS -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformModel, OperatorsSet, TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OperatorsSet, \ + QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs from tests.common_tests.helpers.generate_test_tp_model import generate_tp_model_with_activation_mp from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest @@ -301,13 +302,16 @@ def get_tpc(self): ) tp_model = TargetPlatformModel(QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, + add_metadata=False, name="mp_activation_conf_weights_test") with tp_model: OperatorsSet("Activations", act_mixed_cfg) OperatorsSet("Weights", weight_mixed_cfg) - torch_tpc = TargetPlatformCapabilities(tp_model, name="mp_activation_conf_weights_test") + torch_tpc = TargetPlatformCapabilities(tp_model) with torch_tpc: OperationsSetToLayers( diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py index f88b9270f..12f176543 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py @@ -16,14 +16,16 @@ import numpy as np from torch.nn import Conv2d +import model_compression_toolkit.target_platform_capabilities.schema.v1 from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.core.pytorch.constants import BIAS from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR -from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationConfigOptions, \ - TargetPlatformModel, OperatorsSet, TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, OperationsSetToLayers +from model_compression_toolkit.target_platform_capabilities.schema.v1 import TargetPlatformModel, OperatorsSet, \ + QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_tp_model, get_op_quantization_configs from tests.common_tests.helpers.generate_test_tp_model import generate_mixed_precision_test_tp_model from tests.pytorch_tests.tpc_pytorch import get_pytorch_test_tpc_dict @@ -137,22 +139,25 @@ def get_tpc(self): two_bit_cfg = mixed_precision_cfg_list[2] - weight_mixed_cfg = tp.QuantizationConfigOptions( + weight_mixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( mixed_precision_cfg_list, base_config=cfg, ) - weight_fixed_cfg = tp.QuantizationConfigOptions( + weight_fixed_cfg = model_compression_toolkit.target_platform_capabilities.schema.v1.QuantizationConfigOptions( [two_bit_cfg], base_config=two_bit_cfg, ) - tp_model = tp.TargetPlatformModel(weight_fixed_cfg, name="mp_part_weights_layers_test") + tp_model = model_compression_toolkit.target_platform_capabilities.schema.v1.TargetPlatformModel(weight_fixed_cfg, + tpc_minor_version=None, + tpc_patch_version=None, + name="mp_part_weights_layers_test") with tp_model: - tp.OperatorsSet("Weights_mp", weight_mixed_cfg) - tp.OperatorsSet("Weights_fixed", weight_fixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_mp", weight_mixed_cfg) + model_compression_toolkit.target_platform_capabilities.schema.v1.OperatorsSet("Weights_fixed", weight_fixed_cfg) - pytorch_tpc = tp.TargetPlatformCapabilities(tp_model, name="mp_part_weights_layers_test") + pytorch_tpc = tp.TargetPlatformCapabilities(tp_model) with pytorch_tpc: tp.OperationsSetToLayers( @@ -309,13 +314,15 @@ def get_tpc(self): ) tp_model = TargetPlatformModel(QuantizationConfigOptions([cfg], cfg), + tpc_minor_version=None, + tpc_patch_version=None, name="mp_weights_conf_act_test") with tp_model: OperatorsSet("Activations", act_mixed_cfg) OperatorsSet("Weights", weight_mixed_cfg) - torch_tpc = TargetPlatformCapabilities(tp_model, name="mp_weights_conf_act_test") + torch_tpc = TargetPlatformCapabilities(tp_model) with torch_tpc: OperationsSetToLayers(