sony · irenaby · Jan 27, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 16, 2025
diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
@@ -167,6 +167,14 @@ def is_configurable_weight(self, attr_name: str) -> bool:
         """
         return self.is_weights_quantization_enabled(attr_name) and not self.is_all_weights_candidates_equal(attr_name)
 
+    def has_any_configurable_weight(self) -> bool:
+        """
+        Check whether any of the node's weights is configurable.
+        Returns:
+            Whether any of the node's weights is configurable.
+        """
+        return any(self.is_configurable_weight(attr) for attr in self.weights)
+
     def has_configurable_activation(self) -> bool:
         """
         Checks whether the activation has a configurable quantization.

diff --git a/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py b/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py
@@ -24,7 +24,6 @@
 
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
-from model_compression_toolkit.logger import Logger
 
 
 class VirtualSplitNode(BaseNode):
@@ -73,11 +72,14 @@ def __init__(self, origin_node: BaseNode, kernel_attr: str):
         super().__init__(origin_node)
 
         self.name = origin_node.name + VIRTUAL_WEIGHTS_SUFFIX
-
-        self.candidates_quantization_cfg = origin_node.get_unique_weights_candidates(kernel_attr)
-        for c in self.candidates_quantization_cfg:
-            c.activation_quantization_cfg.enable_activation_quantization = False
-            c.activation_quantization_cfg.activation_n_bits = FLOAT_BITWIDTH
+        # Virtual weights node is created only to be absorbed into virtual composed node right away.
+        # However, in some cases composition is impossible and virtual weights node can remain in the graph.
+        # In such case it messes up resource utilization computation, specifically activation cuts. In order to minimize
+        # the impact, we preserve the behavior of the original node wrt activation (shape and quantization),
+        # so that prev - virtualW cut is identical to prev-origin_node. Only the cut virtualW-virtualA will be different
+        # from the original graph, so in the worst case the utilization will be higher in virtual graph.
+        # This should guarantee that the utilization of the original graph does not exceed the requested target.
+        self.candidates_quantization_cfg = origin_node.candidates_quantization_cfg
 
 
 class VirtualSplitActivationNode(VirtualSplitNode):
@@ -126,61 +128,59 @@ class VirtualActivationWeightsNode(BaseNode):
     def __init__(self,
                  act_node: BaseNode,
                  weights_node: BaseNode,
-                 name: str,
-                 framework_attr: Dict[str, Any],
-                 input_shape: Tuple[Any],
-                 output_shape: Tuple[Any],
-                 weights: Dict[str, np.ndarray],
-                 layer_class: type,
-                 fw_info: FrameworkInfo,
-                 reuse: bool = False,
-                 reuse_group: str = None,
-                 quantization_attr: Dict[str, Any] = None,
-                 has_activation: bool = True,
-                 **kwargs):
+                 fw_info: FrameworkInfo):
         """
         Init a VirtualActivationWeightsNode object.
 
         Args:
             act_node: The original activation node.
             weights_node: The original weights node.
-            name: Node's name
-            framework_attr: Framework attributes the layer had which the node holds.
-            input_shape: Input tensor shape of the node.
-            output_shape: Input tensor shape of the node.
-            weights: Dictionary from a variable name to the weights with that name in the layer the node represents.
-            layer_class: Class path of the layer this node represents.
-            fw_info: A FrameworkInfo object with framework specific information,
-            reuse: Whether this node was duplicated and represents a reused layer.
-            reuse_group: Name of group of nodes from the same reused layer.
-            quantization_attr: Attributes the node holds regarding how it should be quantized.
-            has_activation: Whether the node has activations that we might want to quantize.
-            **kwargs: Additional arguments that can be passed but are not used (allows to init the object with an
-                existing node's __dict__).
-
+            fw_info: A FrameworkInfo object with framework specific information.
         """
-
+        weights = weights_node.weights
+        if act_node.weights:
+            assert fw_info.get_kernel_op_attributes(act_node)[0] is None, \
+                f'Node {act_node} with kernel cannot be used as activation for VirtualActivationWeightsNode.'
+            if set(weights.keys()).intersection(set(act_node.weights.keys())):
+                raise ValueError('Activation and weight nodes are not expected to have the same weight attribute')    # pragma: no cover
+            if any(act_node.is_configurable_weight(attr) for attr in act_node.weights):
+                raise NotImplementedError('Node with a configurable weight cannot be used as activation for '
+                                          'VirtualActivationWeightsNode.')    # pragma: no cover
+            # combine weights from activation and weights
+            weights.update(act_node.weights)
+
+        name = f"{VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX}_{act_node.name}_{weights_node.name}"
         super().__init__(name,
-                         framework_attr,
-                         input_shape,
-                         output_shape,
-                         weights,
-                         layer_class,
-                         reuse,
-                         reuse_group,
-                         quantization_attr,
-                         has_activation)
+                         framework_attr=weights_node.framework_attr,
+                         input_shape=act_node.input_shape,
+                         output_shape=act_node.output_shape,
+                         weights=weights,
+                         layer_class=weights_node.layer_class,
+                         reuse=weights_node.reuse,
+                         reuse_group=weights_node.reuse_group,
+                         quantization_attr=weights_node.quantization_attr,
+                         has_activation=False)
 
         self.name = f"{VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX}_{act_node.name}_{weights_node.name}"
 
         self.original_activation_node = act_node
         self.original_weights_node = weights_node
 
         v_candidates = []
+        kernel_attr = fw_info.get_kernel_op_attributes(weights_node.type)[0]
+        weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(kernel_attr)
         for c_a in act_node.candidates_quantization_cfg:
-            for c_w in weights_node.candidates_quantization_cfg:
+            for c_w in weights_candidates_quantization_cfg:
                 composed_candidate = CandidateNodeQuantizationConfig(activation_quantization_cfg=c_a.activation_quantization_cfg,
                                                                      weights_quantization_cfg=c_w.weights_quantization_cfg)
+                if act_node.weights:
+                    # add non-kernel weights cfg from activation node to the composed node's weights cfg
+                    composed_candidate.weights_quantization_cfg.attributes_config_mapping.update(
+                        c_a.weights_quantization_cfg.attributes_config_mapping
+                    )
+                    composed_candidate.weights_quantization_cfg.pos_attributes_config_mapping.update(
+                        c_a.weights_quantization_cfg.pos_attributes_config_mapping
+                    )
                 v_candidates.append(composed_candidate)
 
         # sorting the candidates by weights number of bits first and then by activation number of bits (reversed order)
@@ -189,26 +189,3 @@ def __init__(self,
                                          c.activation_quantization_cfg.activation_n_bits), reverse=True)
 
         self.candidates_quantization_cfg = v_candidates
-
-    def get_bops_count(self, fw_impl: Any, fw_info: FrameworkInfo, candidate_idx: int) -> float:
-        """
-        Computes the composed node's (edge) bit-operation count.
-
-        Args:
-            fw_impl: A FrameworkImplementation object with framework specific methods.
-            fw_info: A FrameworkInfo object with framework specific information,
-            candidate_idx: The index of the node's quantization candidate configuration.
-
-        Returns: The BOPS count of the composed node.
-
-        """
-        kernel_attr = fw_info.get_kernel_op_attributes(self.original_weights_node.type)[0]
-        node_mac = fw_impl.get_node_mac_operations(self.original_weights_node, fw_info)
-        candidate = self.candidates_quantization_cfg[candidate_idx]
-        kernel_attr_cfg = candidate.weights_quantization_cfg.get_attr_config(kernel_attr)
-        weights_bit = kernel_attr_cfg.weights_n_bits if \
-            kernel_attr_cfg.enable_weights_quantization else FLOAT_BITWIDTH
-        activation_bit = candidate.activation_quantization_cfg.activation_n_bits if \
-            candidate.activation_quantization_cfg.enable_activation_quantization else FLOAT_BITWIDTH
-        node_bops = weights_bit * activation_bit * node_mac
-        return node_bops
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
@@ -19,7 +19,6 @@
 from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
@@ -28,9 +27,6 @@
     NodeActivationQuantizationConfig
 
 
-# TODO take into account Virtual nodes. Are candidates defined with respect to virtual or original nodes?
-#  Can we use the virtual graph only for bops and the original graph for everything else?
-
 class MixedPrecisionRUHelper:
     """ Helper class for resource utilization computations for mixed precision optimization. """
 
@@ -65,7 +61,7 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
             ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
 
         if RUTarget.BOPS in ru_targets:
-            ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
+            ru[RUTarget.BOPS] = self._bops_utilization(act_qcs=act_qcs, w_qcs=w_qcs)
 
         if RUTarget.TOTAL in ru_targets:
             raise ValueError('Total target should be computed based on weights and activations targets.')
@@ -88,8 +84,8 @@ def get_quantization_candidates(self, mp_cfg) \
         """
         mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
         node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
-        act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
-        w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
+        act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
+        w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
 
     def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
@@ -137,51 +133,25 @@ def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivatio
         cuts_util = {c: u.bytes for c, u in cuts_util.items()}
         return cuts_util
 
-    def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
+    def _bops_utilization(self,
+                          act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]],
+                          w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> np.ndarray:
         """
-        Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
-        according to the given mixed-precision configuration of a virtual graph with composed nodes.
+        Computes a resource utilization vector with the respective bit-operations (BOPS) count
+        according to the given mixed-precision configuration.
 
         Args:
-            mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
+            act_qcs: nodes activation configuration or None.
+            w_qcs: nodes quantization configuration to compute, or None.
+              Either both are provided, or both are None.
 
         Returns:
             A vector of node's BOPS count.
         """
-        # bops is computed for all nodes, so non-configurable memory is already covered by the computation of
-        # configurable nodes
-        if not mp_cfg:
+        assert [act_qcs, w_qcs].count(None) in [0, 2], 'act_qcs and w_qcs should both be provided or both be None.'
+        if act_qcs is None:
             return np.array([])
 
-        # TODO keeping old implementation for now
-        virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
-
-        mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
-
-        bops = [n.get_bops_count(self.fw_impl, self.fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes))
-                for n in virtual_bops_nodes]
-
-        return np.array(bops)
-
-
-def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nodes_names: List[str]) -> int:
-    """
-    Returns the index of a node's quantization configuration candidate according to the given
-    mixed-precision configuration. If the node is not configurable, then it must have a single configuration,
-    therefore, the index 0 is returned.
-
-    Args:
-        node: A node to get its candidate configuration index.
-        mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
-        sorted_configurable_nodes_names: A list of configurable nodes names.
-
-    Returns: An index (integer) of a node's quantization configuration candidate.
-    """
-
-    if node.name in sorted_configurable_nodes_names:
-        node_idx = sorted_configurable_nodes_names.index(node.name)
-        return mp_cfg[node_idx]
-    else:    # pragma: no cover
-        assert len(node.candidates_quantization_cfg) > 0, \
-            "Any node should have at least one candidate configuration."
-        return 0
+        _, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom,
+                                                           act_qcs=act_qcs, w_qcs=w_qcs)
+        return np.array(list(detailed_bops.values()))
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -83,6 +83,7 @@ def search_bit_width(graph_to_search_cfg: Graph,
     # Set graph for MP search
     graph = copy.deepcopy(graph_to_search_cfg)  # Copy graph before searching
     if target_resource_utilization.bops_restricted():
+        # TODO: we only need the virtual graph is both activations and weights are configurable
         # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
         graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())
 

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -189,11 +189,9 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
 
         """
         act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
-        act_qcs = None if (RUTarget.ACTIVATION not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else act_qcs
-        w_qcs = None if (RUTarget.WEIGHTS not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else w_qcs
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
-            w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute)
+            w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute, allow_unused_qcs=True)
         return ru
 
     def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):