chore: update licenses

zama-ai · May 22, 2024 · 016d323 · 016d323
1 parent 80fc254
commit 016d323
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 52 deletions.
diff --git a/deps_licenses/licenses_linux_user.txt.md5 b/deps_licenses/licenses_linux_user.txt.md5
@@ -1 +1 @@
-9b8316c2a6c823884676b39f52eb018a
+71ae0b3b4cce88c7c3d477d986f3b234
diff --git a/src/concrete/ml/onnx/onnx_impl_utils.py b/src/concrete/ml/onnx/onnx_impl_utils.py
@@ -230,13 +230,6 @@ def onnx_avgpool_compute_norm_const(
     return norm_const
 
 
-# This function needs to be updated when the truncate feature is released.
-# The following changes should be made:
-# - Remove the `half` term
-# - Replace `rounding_bit_pattern` with `truncate_bit_pattern`
-# - Potentially replace `lsbs_to_remove` with `auto_truncate`
-# - Adjust the typing
-# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4143
 def rounded_comparison(
     x: numpy.ndarray, y: numpy.ndarray, lsbs_to_remove: int, operation: ComparisonOperationType
 ) -> Tuple[bool]:
@@ -261,20 +254,5 @@ def rounded_comparison(
     """
 
     assert isinstance(lsbs_to_remove, int)
-
-    # Workaround: in this context, `round_bit_pattern` is used as a truncate operation.
-    # Consequently, we subtract a term, called `half` that will subsequently be re-added during the
-    # `round_bit_pattern` process.
-    # half = 1 << (lsbs_to_remove - 1)
-
-    # To determine if 'x' 'operation' 'y' (operation being <, >, >=, <=), we evaluate 'x - y'
-    # We cast to int because if half is too high the result might be float
-    # intermediate = ((x - y) - half)
-    # intermediate_as_int = intermediate.astype(numpy.int64)
-    #
-    # if not isinstance(intermediate, Tracer):
-    #     assert (intermediate == intermediate_as_int).all()
-
     rounded_subtraction = truncate_bit_pattern(x - y, lsbs_to_remove=lsbs_to_remove)
-
     return (operation(rounded_subtraction),)
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
@@ -1339,7 +1339,6 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]):
 
         BaseEstimator.__init__(self)
 
-    # TODO: FIX EXACT PREDICTION WITH HIGH BIT WIDTH
     # pylint: disable=too-many-locals,too-many-statements,too-many-branches
     @classmethod
     def from_sklearn_model(
@@ -1384,7 +1383,7 @@ def from_sklearn_model(
 
         cls_signature = inspect.signature(cls)
         init_params_keys = list(init_params.keys())
-        for key in init_params_keys:
+        for key in init_params_keys:  # pragma: no cover
             if key not in cls_signature.parameters:
                 init_params.pop(key)
         model = cls(n_bits=n_bits, **init_params)
@@ -1445,13 +1444,11 @@ def from_sklearn_model(
                 max_threshold_value = unique_threshold_for_feature_sorted.max()
                 min_threshold_value = unique_threshold_for_feature_sorted.min()
             else:
-                # TODO: maybe we should pick a random value here ?
                 max_threshold_value = 1.0
                 min_threshold_value = 0.0
 
             # We compute a epsilon such that we have one quantized value on each side of the range
             # This offset will either be a right or left offset according to the framework
-            # TODO: reconsider this
             number_of_need_offset_values = 2
             if num_unique_thresholds == 0:
                 epsilon = 1.0
@@ -1463,7 +1460,6 @@ def from_sklearn_model(
                 )
 
             # Input quantizers based on thresholds
-            # TODO: DOUBLE CHECK THIS PART
             if X is None:
                 if num_unique_thresholds:
                     min_quantization_value = min_threshold_value
@@ -1495,7 +1491,6 @@ def from_sklearn_model(
                 n_bits=n_bits,
                 values=numpy.array([min_quantization_value, max_quantization_value]),
             ).quantizer
-            # TODO: Assert that there is one and only one bit-value above and below the threshold
             input_quantizers.append(input_quantizer)
 
         # Convert thresholds to their quantized equivalent
@@ -1513,14 +1508,6 @@ def from_sklearn_model(
             quantized_thresholds_array[threshold_index, 0] = quantized_threshold_value
             dequantized_thresholds_array[threshold_index, 0] = dequantized_threshold_value
 
-        # TODO: debug
-        if n_bits > 20:
-            diff = dequantized_thresholds_array - bias_1
-            max_diff = numpy.abs(diff).max()
-            if max_diff > 1e-4:
-                print("ERROR")
-                print(max_diff)
-
         onnx_model.graph.initializer[bias_1_index].CopyFrom(
             numpy_helper.from_array(
                 quantized_thresholds_array,

diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py
@@ -506,7 +506,7 @@ def get_lsbs_to_remove_for_trees(array: numpy.ndarray) -> int:
         stage_1 = bias_1 - (q_x @ mat_1.transpose(0, 2, 1))
         matrix_q = stage_1 >= 0
 
-    else:
+    else:  # pragma: no cover
         raise ValueError("Couldn't see if the comparison is 'Less' or 'LessOrEqual'")
 
     lsbs_to_remove_for_trees_stage_1 = get_lsbs_to_remove_for_trees(stage_1)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
@@ -1137,18 +1137,7 @@ def check_exposition_structural_methods_decision_trees(model, x, y):
     )
 
 
-# Add a test to match fp32 -> quant -> dequant weights at the ONNX level
-# in the high bit width setting
-# Some snippet to do this:
-# if n_bits > 17:
-#     diff = init_tensor - init_tensor_as_int
-#     max_diff = numpy.abs(diff).max()
-#     if max_diff > 1e-3:
-#         raise ValueError(f"{max_diff=} > 1e-4")
-
-
 # pylint: disable-next=too-many-locals,too-many-statements
-# TODO: make this pass with rounding (high-bit-width create overflow)
 @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets())
 @pytest.mark.parametrize("use_rounding", [False, True])
 def test_load_fitted_sklearn_tree_models(
@@ -1185,7 +1174,6 @@ def test_load_fitted_sklearn_tree_models(
     max_n_bits = 18
     reasonable_n_bits = 8
 
-    # TODO: add normal bit-width comparison
     if isinstance(concrete_model, BaseTreeClassifierMixin):
         for n_bits, cml_tolerance, sklearn_tolerance in [
             (max_n_bits, 1e-1, 1e-7),
@@ -1273,7 +1261,7 @@ def test_load_fitted_sklearn_tree_models(
     else:
         for n_bits, cml_tolerance, sklearn_tolerance in [
             (max_n_bits, 0.8, 1e-5),
-            (reasonable_n_bits, 1.4, 1.4),
+            (reasonable_n_bits, 1.8, 1.8),
         ]:
             # Load a Concrete ML model from the fitted scikit-learn one
             loaded_from_threshold = model_class.from_sklearn_model(
@@ -1326,7 +1314,7 @@ def test_load_fitted_sklearn_tree_models(
                     value < sklearn_tolerance
                 ), f"{loaded_mse_from_data_mse=} != {sklearn_mse} ({value=}>={sklearn_tolerance=})"
 
-            # # Compare with Concrete ML
+            # Compare with Concrete ML
             with subtests.test(
                 msg="Regression CML vs Threshold", n_bits=n_bits, tolerance=cml_tolerance
             ):
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		9b8316c2a6c823884676b39f52eb018a
		71ae0b3b4cce88c7c3d477d986f3b234