Skip to content

Commit

Permalink
chore: update licenses
Browse files Browse the repository at this point in the history
  • Loading branch information
fd0r committed May 22, 2024
1 parent 80fc254 commit 016d323
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 52 deletions.
2 changes: 1 addition & 1 deletion deps_licenses/licenses_linux_user.txt.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9b8316c2a6c823884676b39f52eb018a
71ae0b3b4cce88c7c3d477d986f3b234
22 changes: 0 additions & 22 deletions src/concrete/ml/onnx/onnx_impl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,6 @@ def onnx_avgpool_compute_norm_const(
return norm_const


# This function needs to be updated when the truncate feature is released.
# The following changes should be made:
# - Remove the `half` term
# - Replace `rounding_bit_pattern` with `truncate_bit_pattern`
# - Potentially replace `lsbs_to_remove` with `auto_truncate`
# - Adjust the typing
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4143
def rounded_comparison(
x: numpy.ndarray, y: numpy.ndarray, lsbs_to_remove: int, operation: ComparisonOperationType
) -> Tuple[bool]:
Expand All @@ -261,20 +254,5 @@ def rounded_comparison(
"""

assert isinstance(lsbs_to_remove, int)

# Workaround: in this context, `round_bit_pattern` is used as a truncate operation.
# Consequently, we subtract a term, called `half` that will subsequently be re-added during the
# `round_bit_pattern` process.
# half = 1 << (lsbs_to_remove - 1)

# To determine if 'x' 'operation' 'y' (operation being <, >, >=, <=), we evaluate 'x - y'
# We cast to int because if half is too high the result might be float
# intermediate = ((x - y) - half)
# intermediate_as_int = intermediate.astype(numpy.int64)
#
# if not isinstance(intermediate, Tracer):
# assert (intermediate == intermediate_as_int).all()

rounded_subtraction = truncate_bit_pattern(x - y, lsbs_to_remove=lsbs_to_remove)

return (operation(rounded_subtraction),)
15 changes: 1 addition & 14 deletions src/concrete/ml/sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,6 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]):

BaseEstimator.__init__(self)

# TODO: FIX EXACT PREDICTION WITH HIGH BIT WIDTH
# pylint: disable=too-many-locals,too-many-statements,too-many-branches
@classmethod
def from_sklearn_model(
Expand Down Expand Up @@ -1384,7 +1383,7 @@ def from_sklearn_model(

cls_signature = inspect.signature(cls)
init_params_keys = list(init_params.keys())
for key in init_params_keys:
for key in init_params_keys: # pragma: no cover
if key not in cls_signature.parameters:
init_params.pop(key)
model = cls(n_bits=n_bits, **init_params)
Expand Down Expand Up @@ -1445,13 +1444,11 @@ def from_sklearn_model(
max_threshold_value = unique_threshold_for_feature_sorted.max()
min_threshold_value = unique_threshold_for_feature_sorted.min()
else:
# TODO: maybe we should pick a random value here ?
max_threshold_value = 1.0
min_threshold_value = 0.0

# We compute a epsilon such that we have one quantized value on each side of the range
# This offset will either be a right or left offset according to the framework
# TODO: reconsider this
number_of_need_offset_values = 2
if num_unique_thresholds == 0:
epsilon = 1.0
Expand All @@ -1463,7 +1460,6 @@ def from_sklearn_model(
)

# Input quantizers based on thresholds
# TODO: DOUBLE CHECK THIS PART
if X is None:
if num_unique_thresholds:
min_quantization_value = min_threshold_value
Expand Down Expand Up @@ -1495,7 +1491,6 @@ def from_sklearn_model(
n_bits=n_bits,
values=numpy.array([min_quantization_value, max_quantization_value]),
).quantizer
# TODO: Assert that there is one and only one bit-value above and below the threshold
input_quantizers.append(input_quantizer)

# Convert thresholds to their quantized equivalent
Expand All @@ -1513,14 +1508,6 @@ def from_sklearn_model(
quantized_thresholds_array[threshold_index, 0] = quantized_threshold_value
dequantized_thresholds_array[threshold_index, 0] = dequantized_threshold_value

# TODO: debug
if n_bits > 20:
diff = dequantized_thresholds_array - bias_1
max_diff = numpy.abs(diff).max()
if max_diff > 1e-4:
print("ERROR")
print(max_diff)

onnx_model.graph.initializer[bias_1_index].CopyFrom(
numpy_helper.from_array(
quantized_thresholds_array,
Expand Down
2 changes: 1 addition & 1 deletion src/concrete/ml/sklearn/tree_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def get_lsbs_to_remove_for_trees(array: numpy.ndarray) -> int:
stage_1 = bias_1 - (q_x @ mat_1.transpose(0, 2, 1))
matrix_q = stage_1 >= 0

else:
else: # pragma: no cover
raise ValueError("Couldn't see if the comparison is 'Less' or 'LessOrEqual'")

lsbs_to_remove_for_trees_stage_1 = get_lsbs_to_remove_for_trees(stage_1)
Expand Down
16 changes: 2 additions & 14 deletions tests/sklearn/test_sklearn_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,18 +1137,7 @@ def check_exposition_structural_methods_decision_trees(model, x, y):
)


# Add a test to match fp32 -> quant -> dequant weights at the ONNX level
# in the high bit width setting
# Some snippet to do this:
# if n_bits > 17:
# diff = init_tensor - init_tensor_as_int
# max_diff = numpy.abs(diff).max()
# if max_diff > 1e-3:
# raise ValueError(f"{max_diff=} > 1e-4")


# pylint: disable-next=too-many-locals,too-many-statements
# TODO: make this pass with rounding (high-bit-width create overflow)
@pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets())
@pytest.mark.parametrize("use_rounding", [False, True])
def test_load_fitted_sklearn_tree_models(
Expand Down Expand Up @@ -1185,7 +1174,6 @@ def test_load_fitted_sklearn_tree_models(
max_n_bits = 18
reasonable_n_bits = 8

# TODO: add normal bit-width comparison
if isinstance(concrete_model, BaseTreeClassifierMixin):
for n_bits, cml_tolerance, sklearn_tolerance in [
(max_n_bits, 1e-1, 1e-7),
Expand Down Expand Up @@ -1273,7 +1261,7 @@ def test_load_fitted_sklearn_tree_models(
else:
for n_bits, cml_tolerance, sklearn_tolerance in [
(max_n_bits, 0.8, 1e-5),
(reasonable_n_bits, 1.4, 1.4),
(reasonable_n_bits, 1.8, 1.8),
]:
# Load a Concrete ML model from the fitted scikit-learn one
loaded_from_threshold = model_class.from_sklearn_model(
Expand Down Expand Up @@ -1326,7 +1314,7 @@ def test_load_fitted_sklearn_tree_models(
value < sklearn_tolerance
), f"{loaded_mse_from_data_mse=} != {sklearn_mse} ({value=}>={sklearn_tolerance=})"

# # Compare with Concrete ML
# Compare with Concrete ML
with subtests.test(
msg="Regression CML vs Threshold", n_bits=n_bits, tolerance=cml_tolerance
):
Expand Down

0 comments on commit 016d323

Please sign in to comment.