diff --git a/docs/advanced_examples/LogisticRegressionTraining.ipynb b/docs/advanced_examples/LogisticRegressionTraining.ipynb index 3e7f816f2..ffe789009 100644 --- a/docs/advanced_examples/LogisticRegressionTraining.ipynb +++ b/docs/advanced_examples/LogisticRegressionTraining.ipynb @@ -30,8 +30,8 @@ "from matplotlib.lines import Line2D\n", "from sklearn import datasets\n", "from sklearn.linear_model import SGDClassifier as SklearnSGDClassifier\n", - "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.metrics import accuracy_score\n", + "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from concrete.ml.sklearn import SGDClassifier\n", "\n", @@ -383,10 +383,10 @@ " model_concrete_partial.partial_fit(x_batch, y_batch, fhe=\"simulate\")\n", "\n", " model_concrete_partial.compile(x_train)\n", - " \n", + "\n", " # Measure accuracy of the model with FHE simulation\n", " y_pred_partial_fhe = model_concrete_partial.predict(x_test, fhe=\"simulate\")\n", - " \n", + "\n", " accuracy_partial = accuracy_score(y_test, y_pred_partial_fhe).mean()\n", " accuracy_scores.append(accuracy_partial)\n", "\n", @@ -404,23 +404,6 @@ "metadata": { "execution": { "timeout": 10800 - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" } }, "nbformat": 4, diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py index a7b32cedf..40a535b09 100644 --- a/src/concrete/ml/common/utils.py +++ b/src/concrete/ml/common/utils.py @@ -565,7 +565,9 @@ def all_values_are_floats(*values: Any) -> bool: return all(_is_of_dtype(value, SUPPORTED_FLOAT_TYPES) for value in values) -def all_values_are_of_dtype(*values: Any, dtypes: Union[str, List[str]], allow_none: bool = False) -> bool: +def all_values_are_of_dtype( + *values: Any, dtypes: Union[str, List[str]], allow_none: bool = False +) -> bool: """Indicate if all unpacked values are of the specified dtype(s). Args: @@ -591,10 +593,12 @@ def all_values_are_of_dtype(*values: Any, dtypes: Union[str, List[str]], allow_n supported_dtypes[dtype] = supported_dtype - # If the values can be None, only check the other values + # If the values can be None, only check the other values if allow_none: - return all(_is_of_dtype(value, supported_dtypes) if value is not None else True for value in values) - + return all( + _is_of_dtype(value, supported_dtypes) if value is not None else True for value in values + ) + return all(_is_of_dtype(value, supported_dtypes) for value in values) diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py index 31708cf9e..4cf8126cb 100644 --- a/src/concrete/ml/quantization/quantized_module.py +++ b/src/concrete/ml/quantization/quantized_module.py @@ -559,7 +559,9 @@ def _fhe_forward( return q_results[0] return q_results - def quantize_input(self, *x: Optional[numpy.ndarray]) -> Union[numpy.ndarray, Tuple[Optional[numpy.ndarray], ...]]: + def quantize_input( + self, *x: Optional[numpy.ndarray] + ) -> Union[numpy.ndarray, Tuple[Optional[numpy.ndarray], ...]]: """Take the inputs in fp32 and quantize it using the learned quantization parameters. Args: @@ -571,22 +573,38 @@ def quantize_input(self, *x: Optional[numpy.ndarray]) -> Union[numpy.ndarray, Tu """ n_inputs = len(self.input_quantizers) n_values = len(x) - + assert_true( n_values == n_inputs, f"Got {n_values} inputs, expected {n_inputs}. Either the quantized module has not been " "properly initialized or the input data has been changed since its initialization.", ValueError, ) - + assert not all(x_i is None for x_i in x), "Please provide at least one input to quantize." - q_x = tuple(self.input_quantizers[idx].quant(x[idx]) if x[idx] is not None else None for idx in range(len(x))) + # Ignore [arg-type] check from mypy as it is not able to see that the input to `quant` + # cannot be None + q_x = tuple( + ( + self.input_quantizers[idx].quant(x[idx]) # type: ignore[arg-type] + if x[idx] is not None + else None + ) + for idx in range(len(x)) + ) # Make sure all inputs are quantized to int64 - assert all_values_are_of_dtype(*q_x, dtypes="int64", allow_none=True), "Inputs were not quantized to int64" + assert all_values_are_of_dtype( + *q_x, dtypes="int64", allow_none=True + ), "Inputs were not quantized to int64" - return q_x[0] if len(q_x) == 1 else q_x + if len(q_x) == 1: + assert q_x[0] is not None + + return q_x[0] + + return q_x def dequantize_output( self, *q_y_preds: numpy.ndarray @@ -732,8 +750,15 @@ def compile( # Quantize the inputs q_inputs = self.quantize_input(*inputs) + # Make sure all inputs are quantized to int64 and are not None + assert all_values_are_of_dtype( + *to_tuple(q_inputs), dtypes="int64", allow_none=False + ), "Inputs were not quantized to int64" + # Generate the input-set with proper dimensions - inputset = _get_inputset_generator(q_inputs) + # Ignore [arg-type] check from mypy as it is not able to see that no values in `q_inputs` + # is None + inputset = _get_inputset_generator(q_inputs) # type: ignore[arg-type] # Check that p_error or global_p_error is not set in both the configuration and in the # direct parameters diff --git a/src/concrete/ml/sklearn/linear_model.py b/src/concrete/ml/sklearn/linear_model.py index 8d2a6aa19..37f349e56 100644 --- a/src/concrete/ml/sklearn/linear_model.py +++ b/src/concrete/ml/sklearn/linear_model.py @@ -353,10 +353,10 @@ def _get_training_quantized_module( iterations=1, fit_bias=self.fit_intercept, ) - + # Enable the underlying FHE circuit to be composed with itself # This feature is used in order to be able to iterate in the clear n times without having - # to encrypt/decrypt the weight/bias values between each loop + # to encrypt/decrypt the weight/bias values between each loop configuration = Configuration(composable=True) # Compile the model using the compile set @@ -434,11 +434,11 @@ def _fit_encrypted( "Target values must be 1D, with a shape of (n_samples,), when FHE training is " f"enabled. Got {y.shape}" ) - + n_samples, n_features = X.shape weight_shape = (1, n_features, 1) - bias_shape = (1,1,1) - + bias_shape = (1, 1, 1) + # Build the quantized module # In case of a partial fit, only do so if it has not been done already (which indicates # that this is the partial fit's first call) @@ -472,9 +472,11 @@ def _fit_encrypted( y = self.label_encoder.transform(y) + # Mypy + assert self.training_quantized_module.fhe_circuit is not None + # Key generation if fhe == "execute": # pragma: no cover - assert self.training_quantized_module.fhe_circuit is not None # Generate the keys only if necessary. This is already done using the `force=False` # parameter, but here we also avoid printing too much verbose if activated @@ -541,8 +543,9 @@ def _fit_encrypted( # A partial fit is similar to running a fit with a single iteration max_iter = 1 if is_partial_fit else self.max_iter + # Quantize and encrypt the batches X_batches_enc, y_batches_enc = [], [] - for iteration_step in range(max_iter): + for _ in range(max_iter): # Sample the batches from X and y in the clear batch_indexes = self.random_number_generator.choice( @@ -555,54 +558,66 @@ def _fit_encrypted( # Build the batches X_batch = X[batch_indexes].astype(float).reshape((1, self.batch_size, n_features)) y_batch = y[batch_indexes].reshape((1, self.batch_size, 1)).astype(float) - - # The underlying quantized module expects (X, y, weight, bias) as inputs. We thus only + + # The underlying quantized module expects (X, y, weight, bias) as inputs. We thus only # quantize the input and target values using the first and second positional parameter - q_X_batch, q_y_batch, _, _ = self.training_quantized_module.quantize_input(X_batch, y_batch, None, None) - + q_X_batch, q_y_batch, _, _ = self.training_quantized_module.quantize_input( + X_batch, y_batch, None, None + ) + # If the training is done in FHE, encrypt the input and target values if fhe == "execute": - + # Similarly, the underlying FHE circuit expects (X, y, weight, bias) as inputs, and # so does the encrypt method - X_batch_enc, y_batch_enc, _, _ = self.training_quantized_module.fhe_circuit.encrypt(q_X_batch, q_y_batch, None, None) - + X_batch_enc, y_batch_enc, _, _ = self.training_quantized_module.fhe_circuit.encrypt( + q_X_batch, q_y_batch, None, None + ) + else: X_batch_enc, y_batch_enc = q_X_batch, q_y_batch - + X_batches_enc.append(X_batch_enc) y_batches_enc.append(y_batch_enc) - # Similarly, we only quantize the weight and bias values using the third and fourth + # Similarly, we only quantize the weight and bias values using the third and fourth # position parameter - _, _, q_weights, q_bias = self.training_quantized_module.quantize_input(None, None, weights, bias) + _, _, q_weights, q_bias = self.training_quantized_module.quantize_input( + None, None, weights, bias + ) # If the training is done in FHE, encrypt the weight and bias values if fhe == "execute": - + # Similarly, we only encrypt using the third and fourth position parameter _, _, weights_enc, bias_enc = self.training_quantized_module.fhe_circuit.encrypt( None, None, q_weights, q_bias ) - + else: weights_enc, bias_enc = q_weights, q_bias # Iterate on the training quantized module in the clear for iteration_step in range(max_iter): - X_batch_enc_i, y_batch_enc_i = X_batches_enc[iteration_step], y_batches_enc[iteration_step] - + X_batch_enc_i, y_batch_enc_i = ( + X_batches_enc[iteration_step], + y_batches_enc[iteration_step], + ) + # Train the model over one iteration inference_start = time.time() - + # If the training is done in FHE, execute the underlying FHE circuit directly on the # encrypted values if fhe == "execute": weights_enc, bias_enc = self.training_quantized_module.fhe_circuit.run( - X_batch_enc_i, y_batch_enc_i, weights_enc, bias_enc, + X_batch_enc_i, + y_batch_enc_i, + weights_enc, + bias_enc, ) - - # Else, use the quantized module on the quantized values (works for both quantized + + # Else, use the quantized module on the quantized values (works for both quantized # clear and FHE simulation modes) else: weights_enc, bias_enc = self.training_quantized_module.quantized_forward( @@ -616,12 +631,16 @@ def _fit_encrypted( # If the training is done in FHE, encrypt the weight and bias values if fhe == "execute": - q_weights, q_bias = self.training_quantized_module.fhe_circuit.decrypt(weights_enc, bias_enc) - + q_weights, q_bias = self.training_quantized_module.fhe_circuit.decrypt( + weights_enc, bias_enc + ) + else: q_weights, q_bias = weights_enc, bias_enc - fitted_weights, fitted_bias = self.training_quantized_module.dequantize_output(q_weights, q_bias) + fitted_weights, fitted_bias = self.training_quantized_module.dequantize_output( + q_weights, q_bias + ) # Reshape parameters to fit what scikit-learn expects fitted_weights, fitted_bias = fitted_weights.squeeze(0), fitted_bias.squeeze(0)