From 82ec66684a168925fb5c83dacca6db106928d156 Mon Sep 17 00:00:00 2001 From: Andrei Stoian Date: Tue, 21 May 2024 15:16:08 +0200 Subject: [PATCH] chore: cleanup notebook --- .../WhitePaperExperiments.ipynb | 86 +-- .../WhitePaperExperiments.py | 501 ++++++++++++++++++ 2 files changed, 556 insertions(+), 31 deletions(-) create mode 100644 use_case_examples/white_paper_experiment/WhitePaperExperiments.py diff --git a/use_case_examples/white_paper_experiment/WhitePaperExperiments.ipynb b/use_case_examples/white_paper_experiment/WhitePaperExperiments.ipynb index be87ee7a6..e34fd4d78 100644 --- a/use_case_examples/white_paper_experiment/WhitePaperExperiments.ipynb +++ b/use_case_examples/white_paper_experiment/WhitePaperExperiments.ipynb @@ -20,10 +20,10 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", "import random\n", "import time\n", "import warnings\n", - "import os\n", "\n", "import numpy as np\n", "import torch\n", @@ -68,9 +68,9 @@ "\n", "# The timing and the accuracy recorded in the article\n", "if os.cpu_count() > 48:\n", - " PAPER_NOTES = { 20: [21.17, 0.97], 50: [43.91, 0.94]}\n", + " PAPER_NOTES = {20: [21.17, 0.971], 50: [43.91, 0.947]}\n", "else:\n", - " PAPER_NOTES = { 20: [115.52, 0.97], 50: [233.55, 0.94]}" + " PAPER_NOTES = {20: [115.52, 0.971], 50: [233.55, 0.947]}" ] }, { @@ -91,7 +91,7 @@ " # in_channel=1, out_channels=1, kernel_size=3, stride=1, padding_mode='replicate'\n", " (\"C\", 1, 1, 3, 1, \"replicate\"),\n", " (\"R\",),\n", - " (\"B\", 1, 30), # 2d batch-norm for 1 channel\n", + " (\"B\", 1, 30), # 2d batch-norm for 1 channel\n", "]\n", "\n", "\n", @@ -102,19 +102,18 @@ " [\n", " (\"L\", INPUT_IMG_SIZE * INPUT_IMG_SIZE, 92),\n", " (\"R\",),\n", - " (\"B\", 92), # 1d batch norm\n", + " (\"B\", 92), # 1d batch norm\n", " ] # noqa: W503\n", " + [ # noqa: W503\n", " (\"L\", 92, 92),\n", " (\"R\",),\n", - " (\"B\", 92), # 1d batch norm\n", + " (\"B\", 92), # 1d batch norm\n", " ]\n", " * (nb_layers - 3) # noqa: W503\n", - " + [\n", - " (\"L\", 92, output_size)\n", - " ] # noqa: W503\n", + " + [(\"L\", 92, output_size)] # noqa: W503\n", " )\n", - " \n", + "\n", + "\n", "class Fp32MNIST(torch.nn.Module):\n", " \"\"\"MNIST Torch model.\"\"\"\n", "\n", @@ -143,7 +142,7 @@ " return torch.nn.Linear(in_features=t[1], out_features=t[2])\n", " if t[0] == \"R\":\n", " return torch.nn.ReLU()\n", - " if t[0] == 'B':\n", + " if t[0] == \"B\":\n", " if len(t) == 2:\n", " return torch.nn.BatchNorm1d(t[1])\n", " elif len(t) == 3:\n", @@ -167,9 +166,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load MNIST data-set\n", - "\n", - "At the time of writing this notebook, `padding=1` is not yet supported by Concrete ML ; as a workaround, padding is added during the data loading transformation process." + "## Load and pre-process the MNIST data-set\n" ] }, { @@ -418,7 +415,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the compilation step, the compiler requires an exhaustive set of data, here noted `data_calibration` to evaluate the maximum integer bit-width within the graph." + "In the compilation step, the compiler requires an exhaustive set of data, named `data_calibration` below, to evaluate the maximum integer bit-width within the graph." ] }, { @@ -487,15 +484,14 @@ " q_module.forward(data[0, None], fhe=\"execute\")\n", " fhe_timing.append((time.time() - start_time))\n", "\n", - " results_cml[nb_layers] = [ acc_test, np.mean(y_predictions), np.min(fhe_timing)]\n", - " \n", + " results_cml[nb_layers] = [acc_test, np.mean(y_predictions), np.min(fhe_timing)]\n", + "\n", " print(\n", " f\"Running NN-{nb_layers} on a {MACHINE} machine:\"\n", " f\"Accuracy in fp32 : {results_cml[nb_layers][0]:.3%} for the test set\\n\"\n", " f\"Accuracy in FHE simulation mode : {results_cml[nb_layers][1]:.3%} for the test set\\n\"\n", " f\"Timing in FHE: {results_cml[nb_layers][2]:.3f}s per sample.\"\n", - " )\n", - "\n" + " )" ] }, { @@ -566,18 +562,46 @@ "source": [ "import pandas as pd\n", "\n", - "pd.DataFrame([\n", - " [20, PAPER_NOTES[20][1], PAPER_NOTES[20][0], results_cml[20][0], results_cml[20][1], results_cml[20][2], PAPER_NOTES[20][0]/results_cml[20][2]], \n", - " [50, PAPER_NOTES[50][1], PAPER_NOTES[50][0], results_cml[50][0], results_cml[50][1], results_cml[50][2], PAPER_NOTES[50][0]/results_cml[50][2]]\n", - " ], columns=[\"Num Layers\", \"Accuracy [1]\", \"FHE Latency [1]\", \"Our Accuracy fp32\", \"Our Accuracy FHE\", \"Our FHE Latency\", \"Speedup\"]\n", - ").style.format({\n", - " 'Accuracy [1]': '{:,.2%}'.format,\n", - " 'FHE Latency [1]':'{:,.2f}s'.format,\n", - " 'Our Accuracy fp32': '{:,.2%}'.format,\n", - " 'Our Accuracy FHE': '{:,.2%}'.format,\n", - " 'Our FHE Latency': '{:,.2f}s'.format,\n", - " 'Speedup': '{:,.1f}x'.format\n", - "})" + "pd.DataFrame(\n", + " [\n", + " [\n", + " 20,\n", + " PAPER_NOTES[20][1],\n", + " PAPER_NOTES[20][0],\n", + " results_cml[20][0],\n", + " results_cml[20][1],\n", + " results_cml[20][2],\n", + " PAPER_NOTES[20][0] / results_cml[20][2],\n", + " ],\n", + " [\n", + " 50,\n", + " PAPER_NOTES[50][1],\n", + " PAPER_NOTES[50][0],\n", + " results_cml[50][0],\n", + " results_cml[50][1],\n", + " results_cml[50][2],\n", + " PAPER_NOTES[50][0] / results_cml[50][2],\n", + " ],\n", + " ],\n", + " columns=[\n", + " \"Num Layers\",\n", + " \"Accuracy [1]\",\n", + " \"FHE Latency [1]\",\n", + " \"Our Accuracy fp32\",\n", + " \"Our Accuracy FHE\",\n", + " \"Our FHE Latency\",\n", + " \"Speedup\",\n", + " ],\n", + ").style.format(\n", + " {\n", + " \"Accuracy [1]\": \"{:,.1%}\".format,\n", + " \"FHE Latency [1]\": \"{:,.2f}s\".format,\n", + " \"Our Accuracy fp32\": \"{:,.1%}\".format,\n", + " \"Our Accuracy FHE\": \"{:,.1%}\".format,\n", + " \"Our FHE Latency\": \"{:,.2f}s\".format,\n", + " \"Speedup\": \"{:,.1f}x\".format,\n", + " }\n", + ")" ] } ], diff --git a/use_case_examples/white_paper_experiment/WhitePaperExperiments.py b/use_case_examples/white_paper_experiment/WhitePaperExperiments.py new file mode 100644 index 000000000..e128c7408 --- /dev/null +++ b/use_case_examples/white_paper_experiment/WhitePaperExperiments.py @@ -0,0 +1,501 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Deep MNIST classifiers +# +# ## Reproducing the results from **Programmable Bootstrapping Enables Efficient Homomorphic Inference of Deep Neural Networks** +# +# This notebook replicates experiments from the paper [_Programmable Bootstrapping Enables Efficient Homomorphic Inference of Deep Neural Networks_](https://whitepaper.zama.ai/), published in 2021. +# It provides an in-depth analysis of the deep neural network architectures NN-20 and NN-50, along with their training processes using floating point precision and their [quantization](https://docs.zama.ai/concrete-ml/explanations/quantization) using Post Training Quantization (PTQ). +# +# We compare the original paper's findings with the results from the latest version of [Concrete ML](https://pypi.org/project/concrete-ml/). This comparison highlights the significant advancements made by Concrete ML, particularly in execution speed while preserving model accuracy. + +# In[1]: + + +import os +import random +import time +import warnings + +import numpy as np +import torch +from torchvision import datasets, transforms + +from concrete.ml.torch.compile import compile_torch_model + + +import pickle as pkl +import random +import sys +import warnings +from collections import OrderedDict +from pathlib import Path +from typing import Dict + +import matplotlib.pyplot as plt +import numpy as np +import torch +from brevitas import config +from torch.utils.data.dataloader import DataLoader +from torchvision.utils import make_grid +from tqdm import tqdm + +warnings.filterwarnings("ignore", category=UserWarning) + + +# Normalization parameters for MNIST data +MEAN = STD = 0.5 + + + +def mapping_keys(pre_trained_weights: Dict, model: torch.nn.Module, device: str) -> torch.nn.Module: + """ + Initialize the quantized model with pre-trained fp32 weights. + + Args: + pre_trained_weights (Dict): The state_dict of the pre-trained fp32 model. + model (nn.Module): The Brevitas model. + device (str): Device type. + + Returns: + Callable: The quantized model with the pre-trained state_dict. + """ + + # Brevitas requirement to ignore missing keys + config.IGNORE_MISSING_KEYS = True + + old_keys = list(pre_trained_weights.keys()) + new_keys = list(model.state_dict().keys()) + new_state_dict = OrderedDict() + + for old_key, new_key in zip(old_keys, new_keys): + new_state_dict[new_key] = pre_trained_weights[old_key] + + model.load_state_dict(new_state_dict) + model = model.to(device) + + return model + + +def train( + model: torch.nn.Module, + train_loader: DataLoader, + test_loader: DataLoader, + param: Dict, + step: int = 1, + device: str = "cpu", +) -> torch.nn.Module: + """Training the model. + + Args: + model (nn.Module): A PyTorch or Brevitas network. + train_loader (DataLoader): The training set. + test_loader (DataLoader): The test set. + param (Dict): Set of hyper-parameters to use depending on whether + CIFAR-10 or CIFAR-100 is used. + step (int): Display the loss and accuracy every `epoch % step`. + device (str): Device type. + Returns: + nn.Module: the trained model. + """ + + param["accuracy_test"] = param.get("accuracy_test", []) + param["accuracy_train"] = param.get("accuracy_train", []) + param["loss_test_history"] = param.get("loss_test_history", []) + param["loss_train_history"] = param.get("loss_train_history", []) + param["criterion"] = param.get("criterion", torch.nn.CrossEntropyLoss()) + + if param["seed"]: + + torch.manual_seed(param["seed"]) + random.seed(param["seed"]) + + model = model.to(device) + + optimizer = torch.optim.Adam(model.parameters(), lr=param["lr"]) + scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, milestones=param["milestones"], gamma=param["gamma"] + ) + + # Save the state_dict + dir = Path(".") / param["dir"] / param["training"] + dir.mkdir(parents=True, exist_ok=True) + + # To avoid breaking up the tqdm bar + with tqdm(total=param["epochs"], file=sys.stdout) as pbar: + + for i in range(param["epochs"]): + # Train the model + model.train() + loss_batch_train, accuracy_batch_train = [], [] + + for x, y in train_loader: + x, y = x.to(device), y.to(device) + + optimizer.zero_grad() + yhat = model(x) + loss_train = param["criterion"](yhat, y) + loss_train.backward() + optimizer.step() + + loss_batch_train.append(loss_train.item()) + accuracy_batch_train.extend((yhat.argmax(1) == y).cpu().float().tolist()) + + if scheduler: + scheduler.step() + + param["accuracy_train"].append(np.mean(accuracy_batch_train)) + param["loss_train_history"].append(np.mean(loss_batch_train)) + + # Evaluation during training: + # Disable autograd engine (no backpropagation) + # To reduce memory usage and to speed up computations + with torch.no_grad(): + # Notify batchnormalization & dropout layers to work in eval mode + model.eval() + loss_batch_test, accuracy_batch_test = [], [] + for x, y in test_loader: + x, y = x.to(device), y.to(device) + yhat = model(x) + loss_test = param["criterion"](yhat, y) + loss_batch_test.append(loss_test.item()) + accuracy_batch_test.extend((yhat.argmax(1) == y).cpu().float().tolist()) + + param["accuracy_test"].append(np.mean(accuracy_batch_test)) + param["loss_test_history"].append(np.mean(loss_batch_test)) + + if i % step == 0: + pbar.write( + f"Epoch {i:2}: Train loss = {param['loss_train_history'][-1]:.4f} " + f"VS Test loss = {param['loss_test_history'][-1]:.4f} - " + f"Accuracy train: {param['accuracy_train'][-1]:.4f} " + f"VS Accuracy test: {param['accuracy_test'][-1]:.4f}" + ) + pbar.update(step) + + print("Save in:", f"{dir}/{param['dataset_name']}_{param['training']}_state_dict.pt") + torch.save( + model.state_dict(), f"{dir}/{param['dataset_name']}_{param['training']}_state_dict.pt" + ) + + with open(f"{dir}/{param['dataset_name']}_history.pkl", "wb") as f: + pkl.dump(param, f) + + torch.cuda.empty_cache() + + return model + + +def torch_inference( + model: torch.nn.Module, + data: DataLoader, + device: str = "cpu", + verbose: bool = False, +) -> float: + """Returns the `top_k` accuracy. + + Args: + model (torch.nn.Module): A PyTorch or Brevitas network. + data (DataLoader): The test or evaluation set. + device (str): Device type. + verbose (bool): For display. + Returns: + float: The top_k accuracy. + """ + correct = [] + total_example = 0 + model = model.to(device) + + with torch.no_grad(): + model.eval() + for x, y in tqdm(data, disable=verbose is False): + x, y = x.to(device), y + yhat = model(x).cpu().detach() + correct.append(yhat.argmax(1) == y) + total_example += len(x) + + return np.mean(np.vstack(correct), dtype="float64") + + +warnings.filterwarnings("ignore", category=UserWarning) + + +# ## Architecture +# +# All networks begin with a convolutional layer configured with `in_channel=1, out_channels=1, kernel_size=3, stride=1, padding_mode='replicate'`. +# +# This is followed by 20 linear layers of 92 neurones with ReLU activation for NN-20, and 50 layers for NN-50. + +# In[2]: + + +DEVICE = "cpu" + +# Input size, 28x28 pixels, a standard size for MNIST images +INPUT_IMG_SIZE = 28 + +# Batch size +BATCH_SIZE = 64 + +# Seed to ensure reproducibility +SEED = 42 + +# Wether the experiments are run on PC or other machines, like HP7C on AWS +MACHINE = f"{os.cpu_count()}-core machine" + +# The timing and the accuracy recorded in the article +if os.cpu_count() > 48: + PAPER_NOTES = {20: [21.17, 0.971], 50: [43.91, 0.947]} +else: + PAPER_NOTES = {20: [115.52, 0.971], 50: [233.55, 0.947]} + + +# ## FP32 MNIST Neural Nerwork + +# In[3]: + + +FEATURES_MAPS = [ + # Convolution layer, with: + # in_channel=1, out_channels=1, kernel_size=3, stride=1, padding_mode='replicate' + ("C", 1, 1, 3, 1, "replicate"), + ("R",), + ("B", 1, 30), # 2d batch-norm for 1 channel +] + + +# The article presents 3 neural network depths. In this notebook, we focus NN-20 and NN-50 +# architectures. The parameter `nb_layers`: controls the depth of the NN. +def LINEAR_LAYERS(nb_layers: int, output_size: int): + return ( # noqa: W503 + [ + ("L", INPUT_IMG_SIZE * INPUT_IMG_SIZE, 92), + ("R",), + ("B", 92), # 1d batch norm + ] # noqa: W503 + + [ # noqa: W503 + ("L", 92, 92), + ("R",), + ("B", 92), # 1d batch norm + ] + * (nb_layers - 3) # noqa: W503 + + [("L", 92, output_size)] # noqa: W503 + ) + + +class Fp32MNIST(torch.nn.Module): + """MNIST Torch model.""" + + def __init__(self, nb_layers: int, output_size: int = 10): + """MNIST Torch model. + + Args: + nb_layers (int): Number of layers. + output_size (int): Number of classes. + """ + super().__init__() + + self.nb_layers = nb_layers + self.output_size = output_size + + def make_layers(t): + if t[0] == "C": + # Workaround: stride=1, padding_mode='replicate' is replaced by + # transforms.Pad(1, padding_mode="edge") + return torch.nn.Conv2d( + in_channels=t[1], + out_channels=t[2], + kernel_size=t[3], + ) + if t[0] == "L": + return torch.nn.Linear(in_features=t[1], out_features=t[2]) + if t[0] == "R": + return torch.nn.ReLU() + if t[0] == "B": + if len(t) == 2: + return torch.nn.BatchNorm1d(t[1]) + elif len(t) == 3: + return torch.nn.BatchNorm2d(t[1]) + + raise NameError(f"'{t}' not defined") + + self.features_maps = torch.nn.Sequential(*[make_layers(t) for t in FEATURES_MAPS]) + self.linears = torch.nn.Sequential( + *[make_layers(t) for t in LINEAR_LAYERS(self.nb_layers, self.output_size)] + ) + + def forward(self, x): + x = self.features_maps(x) + x = torch.nn.Flatten()(x) + x = self.linears(x) + return x + + +# ## Load and pre-process the MNIST data-set +# + +# In[4]: + + +g = torch.Generator() +g.manual_seed(SEED) +np.random.seed(SEED) +torch.manual_seed(SEED) +random.seed(SEED) + +train_transform = transforms.Compose( + [ # Workaround: stride=1, padding_mode='replicate' is replaced by + # transforms.Pad(1, padding_mode="edge") + transforms.Pad(1, padding_mode="edge"), + transforms.ToTensor(), + transforms.Normalize((MEAN,), (STD,)), + transforms.GaussianBlur(kernel_size=(3, 3)), + ] +) +test_transform = transforms.Compose( + [ # Workaround: stride=1, padding_mode='replicate' is replaced by + # transforms.Pad(1, padding_mode="edge") + transforms.Pad(1, padding_mode="edge"), + transforms.ToTensor(), + transforms.Normalize((MEAN,), (STD,)), + ] +) + +train_dataset = datasets.MNIST(download=True, root="./data", train=True, transform=train_transform) + +test_dataset = datasets.MNIST(download=True, root="./data", train=False, transform=test_transform) + +train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + generator=g, +) + +test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + generator=g, +) + + + +# ## Benchmark the models +# +# In the next sections of this notebook, we apply the same experimental protocol to both NN-20 and NN-50 models. +# +# To make a custom neural network FHE-compatible, it's necessary to quantize both the network and its inputs. Post-Training Quantization (PTQ) involves quantizes a pre-trained floating-point model directly, and does not require re-training. In Concrete ML, the compilation of the custom NNs is performed through `compile_torch_model` function +# + +# In the compilation step, the compiler requires an exhaustive set of data, named `data_calibration` below, to evaluate the maximum integer bit-width within the graph. + +# In[5]: + + +data_calibration = next(iter(train_loader))[0] + +results_cml = {} + +for nb_layers in [20, 50]: + + fp32_mnist = Fp32MNIST(nb_layers=nb_layers).to(DEVICE) + + checkpoint = torch.load( + f"./checkpoints/MNIST/MLP_{nb_layers}/fp32/MNIST_fp32_state_dict.pt", map_location=DEVICE + ) + fp32_mnist.load_state_dict(checkpoint) + + acc_test = torch_inference(fp32_mnist, test_loader, device=DEVICE) + + # The model is compiled through 'compile_torch_model' method + q_module = compile_torch_model( + fp32_mnist.to(DEVICE), + torch_inputset=data_calibration, + n_bits=6, + rounding_threshold_bits={"n_bits": 6, "method": "APPROXIMATE"}, + p_error=0.1, + ) + + fhe_timing = [] + y_predictions = [] + fhe_samples = 3 + + # The model is evaluated through all the test data-set in 'simulation' mode + for i, (data, labels) in enumerate(test_loader): + + data, labels = data.detach().cpu().numpy(), labels.detach().cpu().numpy() + simulate_predictions = q_module.forward(data, fhe="simulate") + y_predictions.extend(simulate_predictions.argmax(1) == labels) + + # Measure FHE latency on three samples and take the minimum + if i <= fhe_samples: + start_time = time.time() + q_module.forward(data[0, None], fhe="execute") + fhe_timing.append((time.time() - start_time)) + else: + break + + results_cml[nb_layers] = [acc_test, np.mean(y_predictions), np.min(fhe_timing)] + + print( + f"Running NN-{nb_layers} on a {MACHINE} machine:" + f"Accuracy in fp32 : {results_cml[nb_layers][0]:.3%} for the test set\n" + f"Accuracy in FHE simulation mode : {results_cml[nb_layers][1]:.3%} for the test set\n" + f"Timing in FHE: {results_cml[nb_layers][2]:.3f}s per sample." + ) + + +# ## Conclusion +# +# Here is a recap of the results obtained by running this notebook, compared to the results in the whitepaper [1]. + +# In[6]: + + +import pandas as pd + +pd.DataFrame( + [ + [ + 20, + PAPER_NOTES[20][1], + PAPER_NOTES[20][0], + results_cml[20][0], + results_cml[20][1], + results_cml[20][2], + PAPER_NOTES[20][0] / results_cml[20][2], + ], + [ + 50, + PAPER_NOTES[50][1], + PAPER_NOTES[50][0], + results_cml[50][0], + results_cml[50][1], + results_cml[50][2], + PAPER_NOTES[50][0] / results_cml[50][2], + ], + ], + columns=[ + "Num Layers", + "Accuracy [1]", + "FHE Latency [1]", + "Our Accuracy fp32", + "Our Accuracy FHE", + "Our FHE Latency", + "Speedup", + ], +).style.format( + { + "Accuracy [1]": "{:,.1%}".format, + "FHE Latency [1]": "{:,.2f}s".format, + "Our Accuracy fp32": "{:,.1%}".format, + "Our Accuracy FHE": "{:,.1%}".format, + "Our FHE Latency": "{:,.2f}s".format, + "Speedup": "{:,.1f}x".format, + } +) +