Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bops calculation #1369

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions model_compression_toolkit/core/common/graph/base_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,14 @@ def is_configurable_weight(self, attr_name: str) -> bool:
"""
return self.is_weights_quantization_enabled(attr_name) and not self.is_all_weights_candidates_equal(attr_name)

def has_any_configurable_weight(self) -> bool:
"""
Check whether any of the node's weights is configurable.
Returns:
Whether any of the node's weights is configurable.
"""
return any(self.is_configurable_weight(attr) for attr in self.weights)

def has_configurable_activation(self) -> bool:
"""
Checks whether the activation has a configurable quantization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
CandidateNodeQuantizationConfig
from model_compression_toolkit.logger import Logger


class VirtualSplitNode(BaseNode):
Expand Down Expand Up @@ -73,11 +72,14 @@ def __init__(self, origin_node: BaseNode, kernel_attr: str):
super().__init__(origin_node)

self.name = origin_node.name + VIRTUAL_WEIGHTS_SUFFIX

self.candidates_quantization_cfg = origin_node.get_unique_weights_candidates(kernel_attr)
for c in self.candidates_quantization_cfg:
c.activation_quantization_cfg.enable_activation_quantization = False
c.activation_quantization_cfg.activation_n_bits = FLOAT_BITWIDTH
# Virtual weights node is created only to be absorbed into virtual composed node right away.
# However, in some cases composition is impossible and virtual weights node can remain in the graph.
# In such case it messes up resource utilization computation, specifically activation cuts. In order to minimize
# the impact, we preserve the behavior of the original node wrt activation (shape and quantization),
# so that prev - virtualW cut is identical to prev-origin_node. Only the cut virtualW-virtualA will be different
# from the original graph, so in the worst case the utilization will be higher in virtual graph.
# This should guarantee that the utilization of the original graph does not exceed the requested target.
self.candidates_quantization_cfg = origin_node.candidates_quantization_cfg


class VirtualSplitActivationNode(VirtualSplitNode):
Expand Down Expand Up @@ -126,61 +128,59 @@ class VirtualActivationWeightsNode(BaseNode):
def __init__(self,
act_node: BaseNode,
weights_node: BaseNode,
name: str,
framework_attr: Dict[str, Any],
input_shape: Tuple[Any],
output_shape: Tuple[Any],
weights: Dict[str, np.ndarray],
layer_class: type,
fw_info: FrameworkInfo,
reuse: bool = False,
reuse_group: str = None,
quantization_attr: Dict[str, Any] = None,
has_activation: bool = True,
**kwargs):
fw_info: FrameworkInfo):
"""
Init a VirtualActivationWeightsNode object.

Args:
act_node: The original activation node.
weights_node: The original weights node.
name: Node's name
framework_attr: Framework attributes the layer had which the node holds.
input_shape: Input tensor shape of the node.
output_shape: Input tensor shape of the node.
weights: Dictionary from a variable name to the weights with that name in the layer the node represents.
layer_class: Class path of the layer this node represents.
fw_info: A FrameworkInfo object with framework specific information,
reuse: Whether this node was duplicated and represents a reused layer.
reuse_group: Name of group of nodes from the same reused layer.
quantization_attr: Attributes the node holds regarding how it should be quantized.
has_activation: Whether the node has activations that we might want to quantize.
**kwargs: Additional arguments that can be passed but are not used (allows to init the object with an
existing node's __dict__).

fw_info: A FrameworkInfo object with framework specific information.
"""

weights = weights_node.weights
if act_node.weights:
assert fw_info.get_kernel_op_attributes(act_node)[0] is None, \
f'Node {act_node} with kernel cannot be used as activation for VirtualActivationWeightsNode.'
if set(weights.keys()).intersection(set(act_node.weights.keys())):
raise ValueError('Activation and weight nodes are not expected to have the same weight attribute') # pragma: no cover
if any(act_node.is_configurable_weight(attr) for attr in act_node.weights):
raise NotImplementedError('Node with a configurable weight cannot be used as activation for '
'VirtualActivationWeightsNode.') # pragma: no cover
# combine weights from activation and weights
weights.update(act_node.weights)

name = f"{VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX}_{act_node.name}_{weights_node.name}"
super().__init__(name,
framework_attr,
input_shape,
output_shape,
weights,
layer_class,
reuse,
reuse_group,
quantization_attr,
has_activation)
framework_attr=weights_node.framework_attr,
input_shape=act_node.input_shape,
output_shape=act_node.output_shape,
weights=weights,
layer_class=weights_node.layer_class,
reuse=weights_node.reuse,
reuse_group=weights_node.reuse_group,
quantization_attr=weights_node.quantization_attr,
has_activation=False)

self.name = f"{VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX}_{act_node.name}_{weights_node.name}"

self.original_activation_node = act_node
self.original_weights_node = weights_node

v_candidates = []
kernel_attr = fw_info.get_kernel_op_attributes(weights_node.type)[0]
weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(kernel_attr)
for c_a in act_node.candidates_quantization_cfg:
for c_w in weights_node.candidates_quantization_cfg:
for c_w in weights_candidates_quantization_cfg:
composed_candidate = CandidateNodeQuantizationConfig(activation_quantization_cfg=c_a.activation_quantization_cfg,
weights_quantization_cfg=c_w.weights_quantization_cfg)
if act_node.weights:
# add non-kernel weights cfg from activation node to the composed node's weights cfg
composed_candidate.weights_quantization_cfg.attributes_config_mapping.update(
c_a.weights_quantization_cfg.attributes_config_mapping
)
composed_candidate.weights_quantization_cfg.pos_attributes_config_mapping.update(
c_a.weights_quantization_cfg.pos_attributes_config_mapping
)
v_candidates.append(composed_candidate)

# sorting the candidates by weights number of bits first and then by activation number of bits (reversed order)
Expand All @@ -189,26 +189,3 @@ def __init__(self,
c.activation_quantization_cfg.activation_n_bits), reverse=True)

self.candidates_quantization_cfg = v_candidates

def get_bops_count(self, fw_impl: Any, fw_info: FrameworkInfo, candidate_idx: int) -> float:
"""
Computes the composed node's (edge) bit-operation count.

Args:
fw_impl: A FrameworkImplementation object with framework specific methods.
fw_info: A FrameworkInfo object with framework specific information,
candidate_idx: The index of the node's quantization candidate configuration.

Returns: The BOPS count of the composed node.

"""
kernel_attr = fw_info.get_kernel_op_attributes(self.original_weights_node.type)[0]
node_mac = fw_impl.get_node_mac_operations(self.original_weights_node, fw_info)
candidate = self.candidates_quantization_cfg[candidate_idx]
kernel_attr_cfg = candidate.weights_quantization_cfg.get_attr_config(kernel_attr)
weights_bit = kernel_attr_cfg.weights_n_bits if \
kernel_attr_cfg.enable_weights_quantization else FLOAT_BITWIDTH
activation_bit = candidate.activation_quantization_cfg.activation_n_bits if \
candidate.activation_quantization_cfg.enable_activation_quantization else FLOAT_BITWIDTH
node_bops = weights_bit * activation_bit * node_mac
return node_bops
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from model_compression_toolkit.core import FrameworkInfo
from model_compression_toolkit.core.common import Graph, BaseNode
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
Expand All @@ -28,9 +27,6 @@
NodeActivationQuantizationConfig


# TODO take into account Virtual nodes. Are candidates defined with respect to virtual or original nodes?
# Can we use the virtual graph only for bops and the original graph for everything else?

class MixedPrecisionRUHelper:
""" Helper class for resource utilization computations for mixed precision optimization. """

Expand Down Expand Up @@ -65,7 +61,7 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
ru[RUTarget.ACTIVATION] = np.array(list(au.values()))

if RUTarget.BOPS in ru_targets:
ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
ru[RUTarget.BOPS] = self._bops_utilization(act_qcs=act_qcs, w_qcs=w_qcs)

if RUTarget.TOTAL in ru_targets:
raise ValueError('Total target should be computed based on weights and activations targets.')
Expand All @@ -88,8 +84,8 @@ def get_quantization_candidates(self, mp_cfg) \
"""
mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
return act_qcs, w_qcs

def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
Expand Down Expand Up @@ -137,51 +133,25 @@ def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivatio
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
return cuts_util

def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
def _bops_utilization(self,
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]],
w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> np.ndarray:
"""
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
according to the given mixed-precision configuration of a virtual graph with composed nodes.
Computes a resource utilization vector with the respective bit-operations (BOPS) count
according to the given mixed-precision configuration.

Args:
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
act_qcs: nodes activation configuration or None.
w_qcs: nodes quantization configuration to compute, or None.
Either both are provided, or both are None.

Returns:
A vector of node's BOPS count.
"""
# bops is computed for all nodes, so non-configurable memory is already covered by the computation of
# configurable nodes
if not mp_cfg:
assert [act_qcs, w_qcs].count(None) in [0, 2], 'act_qcs and w_qcs should both be provided or both be None.'
if act_qcs is None:
return np.array([])

# TODO keeping old implementation for now
virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]

mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)

bops = [n.get_bops_count(self.fw_impl, self.fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes))
for n in virtual_bops_nodes]

return np.array(bops)


def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nodes_names: List[str]) -> int:
"""
Returns the index of a node's quantization configuration candidate according to the given
mixed-precision configuration. If the node is not configurable, then it must have a single configuration,
therefore, the index 0 is returned.

Args:
node: A node to get its candidate configuration index.
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
sorted_configurable_nodes_names: A list of configurable nodes names.

Returns: An index (integer) of a node's quantization configuration candidate.
"""

if node.name in sorted_configurable_nodes_names:
node_idx = sorted_configurable_nodes_names.index(node.name)
return mp_cfg[node_idx]
else: # pragma: no cover
assert len(node.candidates_quantization_cfg) > 0, \
"Any node should have at least one candidate configuration."
return 0
_, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom,
act_qcs=act_qcs, w_qcs=w_qcs)
return np.array(list(detailed_bops.values()))
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def search_bit_width(graph_to_search_cfg: Graph,
# Set graph for MP search
graph = copy.deepcopy(graph_to_search_cfg) # Copy graph before searching
if target_resource_utilization.bops_restricted():
# TODO: we only need the virtual graph is both activations and weights are configurable
# Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,9 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource

"""
act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
act_qcs = None if (RUTarget.ACTIVATION not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else act_qcs
w_qcs = None if (RUTarget.WEIGHTS not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else w_qcs
ru = self.ru_helper.ru_calculator.compute_resource_utilization(
target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute)
w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute, allow_unused_qcs=True)
return ru

def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
Expand Down
Loading