Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: faster rounding test in weekly #957

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/continuous-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -980,10 +980,12 @@ jobs:
run: |
./script/make_utils/check_installation_with_all_python.sh --version ${{ matrix.python_version }} --sync_env

# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4679
# Check installation with pip
- name: Check installation with pip and python ${{ matrix.python_version }} (weekly)
if: |
(fromJSON(env.IS_WEEKLY))
&& matrix.python_version != '3.12'
&& steps.conformance.outcome == 'success'
&& !cancelled()
run: |
Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_examples/DecisionTreeClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"\n",
"# List of hyper parameters to tune\n",
"param_grid = {\n",
" \"max_features\": [None, \"auto\", \"sqrt\", \"log2\"],\n",
" \"max_features\": [None, \"sqrt\", \"log2\"],\n",
" \"min_samples_leaf\": [1, 10, 100],\n",
" \"min_samples_split\": [2, 10, 100],\n",
" \"max_depth\": [None, 2, 4, 6, 8],\n",
Expand Down
31 changes: 24 additions & 7 deletions docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,28 +130,45 @@
" y (np.array): Target labels of the dataset.\n",
" \"\"\"\n",
" if data_id is not None:\n",
" X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(data_id=data_id, as_frame=True, cache=True, return_X_y=True)\n",
" else:\n",
" X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(name=name, as_frame=True, cache=True, return_X_y=True)\n",
" return X, y\n",
"\n",
"\n",
"def preprocess_features(X):\n",
" \"\"\"Convert categorical columns to numerical.\"\"\"\n",
" X_processed = X.copy()\n",
"\n",
" for column in X_processed.columns:\n",
" if X_processed[column].dtype == \"object\" or X_processed[column].dtype.name == \"category\":\n",
" # Convert categorical columns to numeric using label encoding\n",
" X_processed[column] = X_processed[column].astype(\"category\").cat.codes\n",
"\n",
" return X_processed.astype(np.float32)\n",
"\n",
"\n",
"for ds_name, ds_id in dataset_names.items():\n",
" print(f\"Loading {ds_name}\")\n",
"\n",
" X, y = load_dataset(ds_name, ds_id)\n",
"\n",
" # Preprocess features (handle categorical data)\n",
" X = preprocess_features(X)\n",
"\n",
" # Remove rows with NaN values\n",
" not_nan_idx = np.where(~np.isnan(X).any(axis=1))\n",
" X = X[not_nan_idx]\n",
" y = y[not_nan_idx]\n",
" not_nan_mask = ~np.isnan(X).any(axis=1)\n",
" X = X[not_nan_mask]\n",
" y = y[not_nan_mask]\n",
"\n",
" # Convert non-integer target labels to integers\n",
" if not y.dtype == np.int64:\n",
" encoder = OrdinalEncoder()\n",
" y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze()\n",
" # Convert pandas Series to numpy array before reshaping\n",
" y = encoder.fit_transform(np.array(y).reshape(-1, 1)).astype(np.int32).squeeze()\n",
"\n",
" datasets[ds_name] = {\"X\": X, \"y\": y}"
" # Ensure both X and y are numpy arrays before storing\n",
" datasets[ds_name] = {\"X\": np.array(X), \"y\": np.array(y)}"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/advanced_examples/LogisticRegressionTraining.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"\n",
"# Load the Iris dataset\n",
"X_full, y_full = datasets.load_iris(return_X_y=True)\n",
"X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full)\n",
"X_full = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X_full)\n",
"\n",
"# Select petal length and petal width for visualization\n",
"X = X_full[:, 2:4] # Petal length and petal width\n",
Expand Down Expand Up @@ -384,7 +384,7 @@
"X, y = datasets.load_breast_cancer(return_X_y=True)\n",
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)\n",
"\n",
"scaler = MinMaxScaler(feature_range=[-1, 1])\n",
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
"x_train = scaler.fit_transform(x_train)\n",
"x_test = scaler.transform(x_test)\n",
"\n",
Expand Down
17 changes: 1 addition & 16 deletions tests/sklearn/test_sklearn_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,14 +1344,9 @@ def check_rounding_consistency(
y,
predict_method,
metric,
is_weekly_option,
):
"""Test that Concrete ML without and with rounding are 'equivalent'."""

# Run the test with more samples during weekly CIs
if is_weekly_option:
fhe_test = get_random_samples(x, n_sample=5)

# Check that rounding is enabled
assert os.environ.get("TREES_USE_ROUNDING") == "1", "'TREES_USE_ROUNDING' is not enabled"

Expand All @@ -1361,10 +1356,6 @@ def check_rounding_consistency(
rounded_predict_quantized = predict_method(x, fhe="disable")
rounded_predict_simulate = predict_method(x, fhe="simulate")

# Compute the FHE predictions only during weekly CIs
if is_weekly_option:
rounded_predict_fhe = predict_method(fhe_test, fhe="execute")

with pytest.MonkeyPatch.context() as mp_context:

# Disable rounding
Expand All @@ -1389,11 +1380,6 @@ def check_rounding_consistency(
metric(rounded_predict_quantized, not_rounded_predict_quantized)
metric(rounded_predict_simulate, not_rounded_predict_simulate)

# Compute the FHE predictions only during weekly CIs
if is_weekly_option:
not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute")
metric(rounded_predict_fhe, not_rounded_predict_fhe)

# Check that the maximum bit-width of the circuit with rounding is at most:
# maximum bit-width (of the circuit without rounding) + 2
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178
Expand Down Expand Up @@ -2076,7 +2062,7 @@ def test_linear_models_have_no_tlu(
# Additional tests for this purpose should be added in future updates
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4179
@pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets())
@pytest.mark.parametrize("n_bits", [2, 5, 10])
@pytest.mark.parametrize("n_bits", [2, 5, 8])
def test_rounding_consistency_for_regular_models(
model_class,
parameters,
Expand Down Expand Up @@ -2110,7 +2096,6 @@ def test_rounding_consistency_for_regular_models(
y,
predict_method,
metric,
is_weekly_option,
)


Expand Down
Loading