Skip to content

Commit

Permalink
chore: make long notebooks much faster
Browse files Browse the repository at this point in the history
  • Loading branch information
RomanBredehoft committed Apr 25, 2024
1 parent 1234259 commit ec0ff23
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 255 deletions.
16 changes: 9 additions & 7 deletions docs/advanced_examples/ClassifierComparison.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/advanced_examples/DecisionTreeClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
" cv=10,\n",
" scoring=\"average_precision\",\n",
" error_score=\"raise\",\n",
" n_jobs=-1,\n",
" n_jobs=10,\n",
")\n",
"\n",
"gs_results = grid_search.fit(x_train, y_train)\n",
Expand Down
33 changes: 10 additions & 23 deletions docs/advanced_examples/LogisticRegression.ipynb

Large diffs are not rendered by default.

221 changes: 65 additions & 156 deletions docs/advanced_examples/QuantizationAwareTraining.ipynb

Large diffs are not rendered by default.

79 changes: 46 additions & 33 deletions docs/advanced_examples/XGBRegressor.ipynb

Large diffs are not rendered by default.

31 changes: 13 additions & 18 deletions docs/advanced_examples/utils/classifier_comparison_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
ALWAYS_USE_SIM = False

# pylint: disable=too-many-locals,too-many-statements,too-many-branches,invalid-name
def make_classifier_comparison(title, classifiers, decision_level, verbose=False, save_plot=False):

h = 0.04 # Step size in the mesh
def make_classifier_comparison(title, classifiers, decision_level, verbose=False, save_plot=False, simulate=False, h=0.04):
n_samples = 200

X, y = make_classification(
Expand Down Expand Up @@ -140,13 +138,16 @@ def make_classifier_comparison(title, classifiers, decision_level, verbose=False
if verbose:
print(f"Key generation time: {time.time() - time_begin:.4f} seconds")

# Compute the predictions in FHE using the Concrete ML model
fhe = "simulate" if simulate else "execute"

# Compute the predictions in FHE (with simulation or not) using the Concrete ML model
time_begin = time.time()
concrete_y_pred = concrete_model.predict(X_test, fhe="execute")
concrete_y_pred = concrete_model.predict(X_test, fhe=fhe)

if verbose:
print(
f"FHE Execution time: {(time.time() - time_begin) / len(X_test):.4f} "
"FHE " + "(simulation) " * simulate
+ f"Execution time: {(time.time() - time_begin) / len(X_test):.4f} "
"seconds per sample\n"
)

Expand All @@ -169,23 +170,17 @@ def make_classifier_comparison(title, classifiers, decision_level, verbose=False
if not is_a_tree_based_model:
bitwidth = circuit.graph.maximum_integer_bit_width()

raveled_input = np.c_[xx.ravel(), yy.ravel()]

# Plot the decision boundaries.
# For that, a color is assigned to each point in the mesh, which is obtained as a
# cartesian product of [x_min, x_max] with [y_min, y_max].
if hasattr(sklearn_model, "decision_function"):
sklearn_Z = sklearn_model.decision_function(np.c_[xx.ravel(), yy.ravel()])
concrete_Z = concrete_model.decision_function(
np.c_[xx.ravel(), yy.ravel()],
fhe="simulate",
)
sklearn_Z = sklearn_model.decision_function(raveled_input)
concrete_Z = concrete_model.decision_function(raveled_input, fhe="simulate")
else:
sklearn_Z = sklearn_model.predict_proba(
np.c_[xx.ravel(), yy.ravel()].astype(np.float32)
)[:, 1]
concrete_Z = concrete_model.predict_proba(
np.c_[xx.ravel(), yy.ravel()],
fhe="simulate",
)[:, 1]
sklearn_Z = sklearn_model.predict_proba(raveled_input.astype(np.float32))[:, 1]
concrete_Z = concrete_model.predict_proba(raveled_input, fhe="simulate")[:, 1]

for k, (framework, score, Z) in enumerate(
zip(
Expand Down
24 changes: 9 additions & 15 deletions docs/advanced_examples/utils/scaling_comparison_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sklearn.metrics import accuracy_score

# pylint: disable=too-many-locals
def plot_data(axs, X_train, y_train, X_test, y_test, model, name, h = 0.04, font_size_text = 20):
def plot_data(axs, X_train, y_train, X_test, y_test, model, name, h=0.04, font_size_text=20):
# Train the model and retrieve both the Concrete ML model and its equivalent one from
# scikit-learn
concrete_model, sklearn_model = model.fit_benchmark(X_train, y_train)
Expand All @@ -29,24 +29,18 @@ def plot_data(axs, X_train, y_train, X_test, y_test, model, name, h = 0.04, font
# pylint: disable-next=no-member
cm = plt.cm.RdBu
cm_bright = ListedColormap(["#FF0000", "#0000FF"])


raveled_input = np.c_[xx.ravel(), yy.ravel()]

# Plot the decision boundaries.
# For that, a color is assigned to each point in the mesh, which is obtained as a
# cartesian product of [x_min, x_max] with [y_min, y_max].
if hasattr(sklearn_model, "decision_function"):
sklearn_Z = sklearn_model.decision_function(np.c_[xx.ravel(), yy.ravel()])
concrete_Z = concrete_model.decision_function(
np.c_[xx.ravel(), yy.ravel()],
fhe="simulate",
)
sklearn_Z = sklearn_model.decision_function(raveled_input)
concrete_Z = concrete_model.decision_function(raveled_input, fhe="simulate")
else:
sklearn_Z = sklearn_model.predict_proba(
np.c_[xx.ravel(), yy.ravel()].astype(np.float32)
)[:, 1]
concrete_Z = concrete_model.predict_proba(
np.c_[xx.ravel(), yy.ravel()],
fhe="simulate",
)[:, 1]
sklearn_Z = sklearn_model.predict_proba(raveled_input.astype(np.float32))[:, 1]
concrete_Z = concrete_model.predict_proba(raveled_input, fhe="simulate")[:, 1]

for _, (ax, framework, score, Z) in enumerate(
zip(
Expand Down Expand Up @@ -107,4 +101,4 @@ def plot_data(axs, X_train, y_train, X_test, y_test, model, name, h = 0.04, font
f"bit-width={bitwidth}",
size=font_size_text,
horizontalalignment="right",
)
)
4 changes: 2 additions & 2 deletions script/make_utils/jupyter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ WHAT_TO_DO="open"

# Create a list of notebooks with long execution times in order not to consider them when refreshing
# all notebooks at the same time.
LONG_EXECUTION_TIMES_NOTEBOOKS=("docs/advanced_examples/LogisticRegression.ipynb" "docs/advanced_examples/ClassifierComparison.ipynb" "docs/advanced_examples/QuantizationAwareTraining.ipynb" "docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb")
LONG_EXECUTION_TIMES_NOTEBOOKS=("docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb")

while [ -n "$1" ]
do
Expand Down Expand Up @@ -63,7 +63,7 @@ then
echo "" > "${FAILED_NOTEBOOKS}"

# shellcheck disable=SC2207
LIST_OF_NOTEBOOKS=($(find ./docs/ -type f -name "*.ipynb" | grep -v ".nbconvert" | grep -v "_build" | grep -v "ipynb_checkpoints"))
LIST_OF_NOTEBOOKS=($(find ./docs -type f -name "*.ipynb" | grep -v ".nbconvert" | grep -v "_build" | grep -v "ipynb_checkpoints"))

# Remove notebooks with long execution times
for NOTEBOOK_TO_REMOVE in "${LONG_EXECUTION_TIMES_NOTEBOOKS[@]}"
Expand Down

0 comments on commit ec0ff23

Please sign in to comment.