From 720bf366d792e7dd020e26b19cf7a3034e88f686 Mon Sep 17 00:00:00 2001 From: Andrei Stoian Date: Mon, 9 Dec 2024 21:29:11 +0100 Subject: [PATCH 1/4] fix: faster rounding test in weekly --- tests/sklearn/test_sklearn_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 5f500d4de..5bb2027ce 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1350,7 +1350,7 @@ def check_rounding_consistency( # Run the test with more samples during weekly CIs if is_weekly_option: - fhe_test = get_random_samples(x, n_sample=5) + fhe_test = get_random_samples(x, n_sample=3) # Check that rounding is enabled assert os.environ.get("TREES_USE_ROUNDING") == "1", "'TREES_USE_ROUNDING' is not enabled" @@ -2076,7 +2076,7 @@ def test_linear_models_have_no_tlu( # Additional tests for this purpose should be added in future updates # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4179 @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) -@pytest.mark.parametrize("n_bits", [2, 5, 10]) +@pytest.mark.parametrize("n_bits", [2, 5, 8]) def test_rounding_consistency_for_regular_models( model_class, parameters, From 14af92f7aea58488d1c9156080ba9371cabe1505 Mon Sep 17 00:00:00 2001 From: jfrery Date: Tue, 10 Dec 2024 13:10:24 +0100 Subject: [PATCH 2/4] fix: notebook for python>3.8 deps --- .../DecisionTreeClassifier.ipynb | 2 +- .../ExperimentPrivacyTreePaper.ipynb | 31 ++++++++++++++----- .../LogisticRegressionTraining.ipynb | 4 +-- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/docs/advanced_examples/DecisionTreeClassifier.ipynb b/docs/advanced_examples/DecisionTreeClassifier.ipynb index da5714ffb..8b94a616b 100644 --- a/docs/advanced_examples/DecisionTreeClassifier.ipynb +++ b/docs/advanced_examples/DecisionTreeClassifier.ipynb @@ -78,7 +78,7 @@ "\n", "# List of hyper parameters to tune\n", "param_grid = {\n", - " \"max_features\": [None, \"auto\", \"sqrt\", \"log2\"],\n", + " \"max_features\": [None, \"sqrt\", \"log2\"],\n", " \"min_samples_leaf\": [1, 10, 100],\n", " \"min_samples_split\": [2, 10, 100],\n", " \"max_depth\": [None, 2, 4, 6, 8],\n", diff --git a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb index 0b5b2c28b..5bf349784 100644 --- a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb +++ b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb @@ -130,28 +130,45 @@ " y (np.array): Target labels of the dataset.\n", " \"\"\"\n", " if data_id is not None:\n", - " X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True)\n", + " X, y = fetch_openml(data_id=data_id, as_frame=True, cache=True, return_X_y=True)\n", " else:\n", - " X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True)\n", + " X, y = fetch_openml(name=name, as_frame=True, cache=True, return_X_y=True)\n", " return X, y\n", "\n", "\n", + "def preprocess_features(X):\n", + " \"\"\"Convert categorical columns to numerical.\"\"\"\n", + " X_processed = X.copy()\n", + "\n", + " for column in X_processed.columns:\n", + " if X_processed[column].dtype == \"object\" or X_processed[column].dtype.name == \"category\":\n", + " # Convert categorical columns to numeric using label encoding\n", + " X_processed[column] = X_processed[column].astype(\"category\").cat.codes\n", + "\n", + " return X_processed.astype(np.float32)\n", + "\n", + "\n", "for ds_name, ds_id in dataset_names.items():\n", " print(f\"Loading {ds_name}\")\n", "\n", " X, y = load_dataset(ds_name, ds_id)\n", "\n", + " # Preprocess features (handle categorical data)\n", + " X = preprocess_features(X)\n", + "\n", " # Remove rows with NaN values\n", - " not_nan_idx = np.where(~np.isnan(X).any(axis=1))\n", - " X = X[not_nan_idx]\n", - " y = y[not_nan_idx]\n", + " not_nan_mask = ~np.isnan(X).any(axis=1)\n", + " X = X[not_nan_mask]\n", + " y = y[not_nan_mask]\n", "\n", " # Convert non-integer target labels to integers\n", " if not y.dtype == np.int64:\n", " encoder = OrdinalEncoder()\n", - " y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze()\n", + " # Convert pandas Series to numpy array before reshaping\n", + " y = encoder.fit_transform(np.array(y).reshape(-1, 1)).astype(np.int32).squeeze()\n", "\n", - " datasets[ds_name] = {\"X\": X, \"y\": y}" + " # Ensure both X and y are numpy arrays before storing\n", + " datasets[ds_name] = {\"X\": np.array(X), \"y\": np.array(y)}" ] }, { diff --git a/docs/advanced_examples/LogisticRegressionTraining.ipynb b/docs/advanced_examples/LogisticRegressionTraining.ipynb index ca9d7023b..0ad87b51b 100644 --- a/docs/advanced_examples/LogisticRegressionTraining.ipynb +++ b/docs/advanced_examples/LogisticRegressionTraining.ipynb @@ -111,7 +111,7 @@ "\n", "# Load the Iris dataset\n", "X_full, y_full = datasets.load_iris(return_X_y=True)\n", - "X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full)\n", + "X_full = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X_full)\n", "\n", "# Select petal length and petal width for visualization\n", "X = X_full[:, 2:4] # Petal length and petal width\n", @@ -384,7 +384,7 @@ "X, y = datasets.load_breast_cancer(return_X_y=True)\n", "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)\n", "\n", - "scaler = MinMaxScaler(feature_range=[-1, 1])\n", + "scaler = MinMaxScaler(feature_range=(-1, 1))\n", "x_train = scaler.fit_transform(x_train)\n", "x_test = scaler.transform(x_test)\n", "\n", From 380da6635992dbf010825bcc813a398e6da0dbdf Mon Sep 17 00:00:00 2001 From: jfrery Date: Tue, 10 Dec 2024 13:15:30 +0100 Subject: [PATCH 3/4] chore: dont run pip installation check with py12 --- .github/workflows/continuous-integration.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index 9e37001b0..0f2227dec 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -980,10 +980,12 @@ jobs: run: | ./script/make_utils/check_installation_with_all_python.sh --version ${{ matrix.python_version }} --sync_env + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4679 # Check installation with pip - name: Check installation with pip and python ${{ matrix.python_version }} (weekly) if: | (fromJSON(env.IS_WEEKLY)) + && matrix.python_version != '3.12' && steps.conformance.outcome == 'success' && !cancelled() run: | From b7bc5b34c3be2b5c4a2c7aee4183125ce724a22b Mon Sep 17 00:00:00 2001 From: jfrery Date: Wed, 11 Dec 2024 09:55:23 +0100 Subject: [PATCH 4/4] chore: remove fhe vs simulation to speed up weekly in rounding consistency --- tests/sklearn/test_sklearn_models.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 5bb2027ce..f12531869 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1344,14 +1344,9 @@ def check_rounding_consistency( y, predict_method, metric, - is_weekly_option, ): """Test that Concrete ML without and with rounding are 'equivalent'.""" - # Run the test with more samples during weekly CIs - if is_weekly_option: - fhe_test = get_random_samples(x, n_sample=3) - # Check that rounding is enabled assert os.environ.get("TREES_USE_ROUNDING") == "1", "'TREES_USE_ROUNDING' is not enabled" @@ -1361,10 +1356,6 @@ def check_rounding_consistency( rounded_predict_quantized = predict_method(x, fhe="disable") rounded_predict_simulate = predict_method(x, fhe="simulate") - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - rounded_predict_fhe = predict_method(fhe_test, fhe="execute") - with pytest.MonkeyPatch.context() as mp_context: # Disable rounding @@ -1389,11 +1380,6 @@ def check_rounding_consistency( metric(rounded_predict_quantized, not_rounded_predict_quantized) metric(rounded_predict_simulate, not_rounded_predict_simulate) - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute") - metric(rounded_predict_fhe, not_rounded_predict_fhe) - # Check that the maximum bit-width of the circuit with rounding is at most: # maximum bit-width (of the circuit without rounding) + 2 # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178 @@ -2110,7 +2096,6 @@ def test_rounding_consistency_for_regular_models( y, predict_method, metric, - is_weekly_option, )