diff --git a/README.md b/README.md index e650f0973..9cd488eee 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ Concrete ML built-in models have APIs that are almost identical to their scikit- - [Encrypted Large Language Model](use_case_examples/llm/): converting a user-defined part of a Large Language Model for encrypted text generation. This demo shows the trade-off between quantization and accuracy for text generation and shows how to run the model in FHE. - [Private inference for federated learned models](use_case_examples/federated_learning/): private training of a Logistic Regression model and then importing the model into Concrete ML and performing encrypted prediction. -- [Titanic](use_case_examples/titanic/KaggleTitanic.ipynb): solving the [Kaggle Titanic competition](https://www.kaggle.com/c/titanic/). Implemented with XGBoost from Concrete ML, this example comes as a companion of the [Kaggle notebook](https://www.kaggle.com/code/concretemlteam/titanic-with-privacy-preserving-machine-learning), and was the subject of a blogpost in [KDnuggets](https://www.kdnuggets.com/2022/08/machine-learning-encrypted-data.html). +- [Titanic](use_case_examples/titanic/KaggleTitanic.ipynb): solving the [Kaggle Titanic competition](https://www.kaggle.com/c/titanic/). Implemented with XGBoost from Concrete ML, this example comes as a companion of the [Kaggle notebook](https://www.kaggle.com/code/concretemlteam/titanic-with-privacy-preserving-machine-learning). - [CIFAR10 FHE-friendly model with Brevitas](use_case_examples/cifar/cifar_brevitas_training): training a VGG9 FHE-compatible neural network using Brevitas, and a script to run the neural network in FHE. Execution in FHE takes ~4 minutes per image and shows an accuracy of 88.7%. - [CIFAR10 / CIFAR100 FHE-friendly models with Transfer Learning approach](use_case_examples/cifar/cifar_brevitas_finetuning): series of three notebooks, that convert a pre-trained FP32 VGG11 neural network into a quantized model using Brevitas. The model is fine-tuned on the CIFAR data-sets, converted for FHE execution with Concrete ML and evaluated using FHE simulation. For CIFAR10 and CIFAR100, respectively, our simulations show an accuracy of 90.2% and 68.2%. diff --git a/script/make_utils/nbqa.sh b/script/make_utils/nbqa.sh index 16056c2aa..cc7699cf9 100755 --- a/script/make_utils/nbqa.sh +++ b/script/make_utils/nbqa.sh @@ -28,7 +28,7 @@ function nbqa_ize() # %matplotlib inline # --extend-ignore=DAR is because we don't want to run darglint poetry run nbqa flake8 "${NB}" --max-line-length 100 --per-file-ignores="__init__.py:F401" \ - --ignore=E402 --extend-ignore=DAR + --ignore=E402,W503 --extend-ignore=DAR # With some ignored errors, since we don't care: # that the notebook filename is capitalized (invalid-name) @@ -46,9 +46,10 @@ function nbqa_ize() --disable=missing-module-docstring --disable=missing-class-docstring \ --disable=missing-function-docstring \ --disable=wrong-import-position --disable=ungrouped-imports \ - --disable=wrong-import-order\ + --disable=wrong-import-order \ --extension-pkg-whitelist=numpy --disable=redefined-outer-name \ - $PYLINT_EXTRA_OPTIONS + --disable=line-too-long \ + ${PYLINT_EXTRA_OPTIONS} fi } @@ -99,6 +100,4 @@ then echo "Running nbqa on ${NOTEBOOK}" PYLINT_EXTRA_OPTIONS="" nbqa_ize "${NOTEBOOK}" "${PYLINT_EXTRA_OPTIONS}" -fi - - +fi \ No newline at end of file diff --git a/src/concrete/ml/torch/hybrid_model.py b/src/concrete/ml/torch/hybrid_model.py index fcbc6287b..a8d4b27e6 100644 --- a/src/concrete/ml/torch/hybrid_model.py +++ b/src/concrete/ml/torch/hybrid_model.py @@ -492,6 +492,8 @@ def compile_model( """ # We do a forward pass where we accumulate inputs to use for compilation self.set_fhe_mode(HybridFHEMode.CALIBRATE) + + # Run the model to get the calibration data self.model(x) self.configuration = configuration diff --git a/src/concrete/ml/torch/lora.py b/src/concrete/ml/torch/lora.py index 0a05e577b..5a069737a 100644 --- a/src/concrete/ml/torch/lora.py +++ b/src/concrete/ml/torch/lora.py @@ -1,6 +1,6 @@ """This module contains classes for LoRA (Low-Rank Adaptation) training and custom layers.""" -from typing import List +from typing import List, Tuple, Union import torch @@ -32,15 +32,16 @@ class LoraTraining(torch.nn.Module): Args: inference_model (torch.nn.Module): The base model to be fine-tuned. - + n_layers_to_skip (int): Number of layers to skip. Linear layers that do not require + gradient to be propagated are skipped. Defaults to 1. """ - def __init__(self, inference_model) -> None: + def __init__(self, inference_model, n_layers_to_skip: int = 1) -> None: super().__init__() self.inference_model = inference_model - self.replace_layers_with_custom(self.inference_model) + self.replace_layers_with_custom(self.inference_model, n_layers_to_skip) self.optimizer = None self.lr_scheduler = None @@ -52,7 +53,7 @@ def __init__(self, inference_model) -> None: self.run_optimizer = False @staticmethod - def replace_layers_with_custom(model: torch.nn.Module, skip_first: bool = True): + def replace_layers_with_custom(model: torch.nn.Module, n_layers_to_skip: int): """Replace linear layers with custom ones. This method replaces eligible linear layers in the model with custom layers @@ -60,21 +61,19 @@ def replace_layers_with_custom(model: torch.nn.Module, skip_first: bool = True): Args: model (torch.nn.Module): The model to replace layers in. - skip_first (bool): Whether to skip the first eligible layer. + n_layers_to_skip (int): Number of layers to skip. """ - # Flag to track if the first layer has been skipped - skipped = False def _replace(module: torch.nn.Module): - nonlocal skipped + nonlocal n_layers_to_skip for name, child in list(module.named_children()): # Skip modules containing "lora" in their name if "lora" in name: continue if isinstance(child, LINEAR_LAYERS): - if skip_first and not skipped: - skipped = True + if n_layers_to_skip > 0: + n_layers_to_skip -= 1 # Skip the first eligible layer continue @@ -129,11 +128,15 @@ def update_training_parameters( self.gradient_accumulation_steps = 1 self.max_grad_norm = None - def forward(self, inputs): + def forward( + self, inputs: Tuple[torch.Tensor, ...] + ) -> Tuple[torch.Tensor, Union[torch.Tensor, None]]: """Forward pass of the LoRA training module. Args: - inputs: A tuple containing input tensors and labels. + inputs (tuple): A tuple containing the input tensors. The first two elements should be + the features and the labels. Additional elements will be passed + to the model as needed. Returns: A tuple containing the loss and gradient norm. @@ -141,26 +144,41 @@ def forward(self, inputs): Raises: ValueError: If the model does not return a loss when `self.loss_fn` is None. """ + assert ( + len(inputs) >= 2 + ), "Expected at least two inputs in the tuple: inputs (x) and targets (y)" + # Remove this once hybrid model supports multiple inputs # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4568 - x, y = inputs + # Extract x (input features) and y (labels) + x, y = inputs[0], inputs[1] - # Forward pass - if self.loss_fn is None: + # Additional inputs, if any (e.g., attention_mask) + additional_inputs = inputs[2:] - # Assume model computes loss internally - outputs = self.inference_model(x, labels=y) + # If no loss function is provided, we assume the model can compute the loss internally + if self.loss_fn is None: + # Forward pass through the inference model with labels + outputs = self.inference_model(x, labels=y, *additional_inputs) - # Use getattr to safely access the loss attribute + # Use getattr to safely access the loss attribute from the outputs loss = getattr(outputs, "loss", None) if loss is None: raise ValueError( "The model did not return a loss. Ensure that 'labels' are correctly provided." ) else: - outputs = self.inference_model(x) + # Forward pass through the inference model without labels + outputs = self.inference_model(x, *additional_inputs) + + # If the outputs contain several keys, extract the logits + if isinstance(outputs, dict) and "logits" in outputs: + outputs = outputs["logits"] + + # Compute the loss using the provided loss function loss = self.loss_fn(outputs, y) + # Scale the loss based on gradient accumulation loss = loss / self.gradient_accumulation_steps # Update gradients @@ -188,7 +206,7 @@ def forward(self, inputs): elif self.calibrate: self.inference_model.zero_grad() - return (loss, grad_norm) + return loss, grad_norm def toggle_calibrate(self, enable: bool = True): """Toggle calibration mode. diff --git a/tests/torch/test_lora.py b/tests/torch/test_lora.py index b1d30bfb0..a3ee1a03e 100644 --- a/tests/torch/test_lora.py +++ b/tests/torch/test_lora.py @@ -74,7 +74,7 @@ def forward(self, x, labels=None): loss = ((logits - labels) ** 2).mean() Output = namedtuple("Output", ["loss"]) return Output(loss=loss) - return logits + return {"logits": logits, "something_else": torch.tensor(1.0)} @pytest.fixture @@ -89,20 +89,20 @@ def base_lora_training(base_inference_model): return LoraTraining(base_inference_model) -@pytest.mark.parametrize("skip_first", [True, False]) -def test_lora_training_replace_layers(base_lora_training, skip_first): +@pytest.mark.parametrize("n_layers_to_skip", [0, 1, 2]) +def test_lora_training_replace_layers(base_lora_training, n_layers_to_skip): """Test that LoraTraining replaces layers correctly.""" original_linear1 = base_lora_training.inference_model.linear1 original_lora_layer = base_lora_training.inference_model.lora_layer # Replace layers with custom layers base_lora_training.replace_layers_with_custom( - base_lora_training.inference_model, skip_first=skip_first + base_lora_training.inference_model, n_layers_to_skip=n_layers_to_skip ) inference_model = base_lora_training.inference_model - if skip_first: + if n_layers_to_skip > 0: # First eligible layer should be skipped assert inference_model.linear1 is original_linear1 else: @@ -169,7 +169,7 @@ def test_lora_training_forward_with_loss_fn(base_lora_training): y = torch.tensor([[0.5, 1.5]]) outputs = base_lora_training.inference_model(x) - expected_loss = loss_fn(outputs, y) / base_lora_training.gradient_accumulation_steps + expected_loss = loss_fn(outputs["logits"], y) / base_lora_training.gradient_accumulation_steps loss, _ = base_lora_training((x, y)) @@ -225,7 +225,7 @@ def test_lora_training_forward_with_optimizer(base_lora_training): SimpleNamespace(gradient_accumulation_steps=1, max_grad_norm=1.0), ) base_lora_training.replace_layers_with_custom( - base_lora_training.inference_model, skip_first=False + base_lora_training.inference_model, n_layers_to_skip=0 ) base_lora_training.toggle_run_optimizer(True) diff --git a/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb b/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb index 90df84e33..c9eada04d 100644 --- a/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb +++ b/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb @@ -7,9 +7,7 @@ "source": [ "# Fine-Tuning GPT-2 on Encrypted Data with LoRA and Concrete-ML\n", "\n", - "In this notebook, we peform fine-tuning of a GPT-2 model using LoRA and Concrete-ML. This allows us to fine-tune a model in a privacy-preserving manner.\n", - "\n", - "LoRA weight can be used " + "In this notebook, we perform fine-tuning of a GPT-2 model using LoRA and Concrete-ML." ] }, { @@ -26,17 +24,11 @@ "\n", "import matplotlib.pyplot as plt\n", "import torch\n", - "from peft import LoraConfig, TaskType, get_peft_model\n", + "from datasets import Dataset\n", + "from peft import LoraConfig, get_peft_model\n", "from tqdm import tqdm\n", - "from transformers import (\n", - " AutoModelForCausalLM,\n", - " AutoTokenizer,\n", - " DataCollatorForLanguageModeling,\n", - " TextDataset,\n", - " Trainer,\n", - " TrainingArguments,\n", - ")\n", - "from utils_lora import generate_text, print_weights_and_size\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments\n", + "from utils_lora import generate_and_print, print_weights_and_size\n", "\n", "from concrete.ml.torch.hybrid_model import HybridFHEModel\n", "from concrete.ml.torch.lora import LoraTraining, get_remote_names\n", @@ -62,6 +54,7 @@ "# Ensure tokenizer has a pad token\n", "if tokenizer.pad_token is None:\n", " tokenizer.pad_token = tokenizer.eos_token\n", + "model.config.pad_token_id = model.config.eos_token_id\n", "\n", "# Freeze model weights\n", "for param in model.parameters():\n", @@ -79,17 +72,17 @@ "output_type": "stream", "text": [ "What is FHE?\n", - "\n", "FHE is a new type of energy storage that is designed to be used in a variety of applications. It is used to store energy in\n" ] } ], "source": [ - "# Example usage of the pre-trained model\n", - "torch.manual_seed(SEED)\n", - "prompt = \"What is FHE?\"\n", - "generated_text = generate_text(prompt, model, tokenizer)\n", - "print(generated_text)" + "generate_and_print(\n", + " prompt=\"What is FHE?\",\n", + " model=model,\n", + " tokenizer=tokenizer,\n", + " seed=SEED,\n", + ")" ] }, { @@ -99,16 +92,17 @@ "metadata": {}, "outputs": [], "source": [ - "# Configure LoRA\n", + "# Apply LoRA to the model\n", + "# target_modules can be set to \"all-linear\"\n", + "# to target all modules. By default only the\n", + "# c_attn projection are fine-tuned with lora.\n", "peft_config = LoraConfig(\n", - " task_type=TaskType.CAUSAL_LM,\n", " r=8,\n", " lora_alpha=32,\n", - " lora_dropout=0.05,\n", - " fan_in_fan_out=True,\n", + " lora_dropout=0.1,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", ")\n", - "\n", - "# Apply LoRA to the model\n", "peft_model = get_peft_model(model, peft_config)" ] }, @@ -128,17 +122,115 @@ "execution_count": 6, "id": "d10d71e8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "656e3f624a7f4c879b46129e841e4db1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/34 [00:00 0:\n", + " input_ids += [tokenizer.pad_token_id] * padding_length\n", + " labels += [-100] * padding_length\n", + " attention_mask += [0] * padding_length\n", + " else:\n", + " input_ids = input_ids[:BLOCK_SIZE]\n", + " labels = labels[:BLOCK_SIZE]\n", + " attention_mask = attention_mask[:BLOCK_SIZE]\n", + "\n", + " input_ids_list.append(input_ids)\n", + " labels_list.append(labels)\n", + " attention_masks_list.append(attention_mask)\n", + "\n", + " return {\n", + " \"input_ids\": input_ids_list,\n", + " \"labels\": labels_list,\n", + " \"attention_mask\": attention_masks_list,\n", + " }\n", + "\n", + "\n", + "# Apply the tokenization\n", + "tokenized_datasets = dataset.map(\n", + " tokenize_function, batched=True, remove_columns=[\"question\", \"answer\"]\n", + ")\n", + "\n", + "# Since we've already handled padding and labels, we can use a custom data collator\n", + "\n", + "\n", + "def data_collator(features):\n", + " batch = {}\n", + " batch[\"input_ids\"] = torch.tensor([f[\"input_ids\"] for f in features], dtype=torch.long)\n", + " batch[\"labels\"] = torch.tensor([f[\"labels\"] for f in features], dtype=torch.long)\n", + " batch[\"attention_mask\"] = torch.tensor(\n", + " [f[\"attention_mask\"] for f in features], dtype=torch.long\n", + " )\n", + " return batch" ] }, { @@ -148,29 +240,42 @@ "metadata": {}, "outputs": [], "source": [ - "# Set up data collator for language modeling\n", - "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)\n", - "\n", "# Define training arguments\n", - "EPOCHS = 50\n", + "EPOCHS = 20\n", "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n", "\n", "training_args = TrainingArguments(\n", " output_dir=\"./checkpoints\",\n", " num_train_epochs=EPOCHS,\n", - " per_device_train_batch_size=8,\n", - " gradient_accumulation_steps=2,\n", + " per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n", + " gradient_accumulation_steps=1,\n", " save_total_limit=1,\n", " use_cpu=True,\n", - " learning_rate=5e-3,\n", - " logging_strategy=\"epoch\",\n", - " optim=\"adamw_torch\",\n", + " learning_rate=2e-3,\n", + " lr_scheduler_type=\"linear\",\n", " seed=SEED,\n", " data_seed=SEED,\n", - " weight_decay=0.0,\n", - " warmup_steps=0,\n", - " max_grad_norm=1.0,\n", - ")" + " warmup_steps=10,\n", + " weight_decay=0.01,\n", + " prediction_loss_only=True,\n", + ")\n", + "\n", + "\n", + "def causal_lm_loss(logits, labels, ignore_index=-100):\n", + " # Shift logits and labels for next-token prediction\n", + " shift_logits = logits[..., :-1, :].contiguous()\n", + " shift_labels = labels[..., 1:].contiguous()\n", + "\n", + " # Flatten the tensors\n", + " shift_logits = shift_logits.view(-1, shift_logits.size(-1))\n", + " shift_labels = shift_labels.view(-1)\n", + "\n", + " # Compute the loss, ignoring padding tokens\n", + " loss = torch.nn.functional.cross_entropy(\n", + " shift_logits, shift_labels, ignore_index=ignore_index, reduction=\"mean\"\n", + " )\n", + "\n", + " return loss" ] }, { @@ -184,8 +289,8 @@ "trainer = Trainer(\n", " model=peft_model,\n", " args=training_args,\n", + " train_dataset=tokenized_datasets,\n", " data_collator=data_collator,\n", - " train_dataset=train_dataset,\n", ")\n", "\n", "# Prepare for training\n", @@ -199,7 +304,7 @@ "trainer.create_optimizer_and_scheduler(num_training_steps=max_steps)\n", "\n", "lora_training.update_training_parameters(\n", - " trainer.optimizer, trainer.lr_scheduler, None, training_args\n", + " trainer.optimizer, trainer.lr_scheduler, causal_lm_loss, training_args\n", ")" ] }, @@ -233,14 +338,11 @@ "outputs": [], "source": [ "# Prepare input data for calibration\n", - "input_tensor = torch.randint(0, 2, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE)) * (\n", - " tokenizer.vocab_size - 1\n", - ")\n", - "label_tensor = torch.randint(0, 2, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE)) * (\n", - " tokenizer.vocab_size - 1\n", - ")\n", + "input_tensor = torch.randint(0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", + "label_tensor = torch.randint(0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", + "attention_mask = torch.ones((PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", "\n", - "inputset = (input_tensor, label_tensor)" + "inputset = (input_tensor, label_tensor, attention_mask)" ] }, { @@ -251,9 +353,9 @@ "outputs": [], "source": [ "# Calibrate and compile the model\n", - "hybrid_model.model.toggle_calibrate(enable=True)\n", + "lora_training.toggle_calibrate(enable=True)\n", "hybrid_model.compile_model(inputset, n_bits=16)\n", - "hybrid_model.model.toggle_calibrate(enable=False)" + "lora_training.toggle_calibrate(enable=False)" ] }, { @@ -264,30 +366,40 @@ "outputs": [], "source": [ "def train_custom_model(\n", - " hybrid_model, train_dataloader, training_args, fhe=\"disable\"\n", + " hybrid_model, train_dataloader, training_args, tokenizer, fhe=\"disable\"\n", "): # pylint: disable=too-many-locals\n", " device = \"cpu\"\n", " hybrid_model.model.to(device)\n", "\n", " # Training loop\n", - " hybrid_model.model.inference_model.train()\n", - " hybrid_model.model.run_optimizer = True\n", + " peft_model.train()\n", + " lora_training.run_optimizer = True\n", " total_epochs = int(training_args.num_train_epochs)\n", " epoch_pbar = tqdm(total=total_epochs, desc=\"Training Progress\", position=0)\n", "\n", " total_batched_samples = 0\n", " epoch_losses = [] # List to store the loss for each epoch\n", "\n", + " # Generate text before the first epoch\n", + " print(\"Generating text before the first epoch:\\n\")\n", + " prompt = \"What is FHE?\"\n", + " hybrid_model.set_fhe_mode(\"disable\")\n", + " generate_and_print(prompt, peft_model, tokenizer, SEED)\n", + " hybrid_model.set_fhe_mode(fhe)\n", + "\n", " for epoch in range(total_epochs):\n", " total_loss = 0\n", " grad_norms = []\n", "\n", " for _, batch in enumerate(train_dataloader):\n", + "\n", " total_batched_samples += 1\n", "\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", "\n", - " loss, grad_norm = hybrid_model((batch[\"input_ids\"], batch[\"labels\"]), fhe=fhe)\n", + " loss, grad_norm = hybrid_model(\n", + " (batch[\"input_ids\"], batch[\"labels\"], batch[\"attention_mask\"]), fhe=fhe\n", + " )\n", "\n", " total_loss += loss.item()\n", "\n", @@ -295,7 +407,7 @@ " grad_norms.append(grad_norm)\n", "\n", " # Get current learning rate\n", - " current_lr = hybrid_model.model.lr_scheduler.get_last_lr()[0]\n", + " current_lr = lora_training.lr_scheduler.get_last_lr()[0]\n", "\n", " # Get last grad norm\n", " current_grad_norm = grad_norms[-1] if grad_norms else None\n", @@ -309,12 +421,19 @@ " f\"Loss: {total_loss:.4f}, grad norm: {current_grad_norm}, lr: {current_lr}\"\n", " )\n", "\n", + " # Generate text after each epoch\n", + " prompt = \"What is FHE?\"\n", + " hybrid_model.set_fhe_mode(\"disable\")\n", + " generate_and_print(prompt, peft_model, tokenizer, SEED)\n", + " hybrid_model.set_fhe_mode(fhe)\n", + "\n", + " print(\"\\n\" + \"-\" * 50) # Separator for readability\n", " epoch_pbar.update(1)\n", "\n", " # Save model checkpoint\n", " if training_args.output_dir is not None:\n", " save_path = f\"{training_args.output_dir}/checkpoint-{epoch + 1}\"\n", - " hybrid_model.model.inference_model.save_pretrained(save_path)\n", + " peft_model.save_pretrained(save_path)\n", "\n", " epoch_pbar.close()\n", "\n", @@ -338,712 +457,382 @@ "name": "stderr", "output_type": "stream", "text": [ - "Training Progress: 2%|▏ | 1/50 [03:51<3:09:00, 231.45s/it]" + "Training Progress: 0%| | 0/100 [00:00" ] @@ -1057,12 +846,20 @@ "tokenizer.parallelism = False\n", "\n", "# Train the model using FHE simulation\n", - "train_custom_model(hybrid_model, train_dataloader, training_args, fhe=\"simulate\")" + "train_custom_model(hybrid_model, train_dataloader, training_args, tokenizer, fhe=\"simulate\")" + ] + }, + { + "cell_type": "markdown", + "id": "65d448c8", + "metadata": {}, + "source": [ + "Note that our goal is to showcase the use of FHE for fine-tuning a model. The a dataset used contains 68 examples for a total of 2386 tokens. This a very small dataset which does not allow the model to learn a lot of information and output very interesting results." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "bd666f38", "metadata": {}, "outputs": [], @@ -1076,7 +873,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "3e91ad0b", "metadata": {}, "outputs": [ @@ -1084,9 +881,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "What is FHE??\n", - "\n", - "Fully Homomorphic Encryption (FHE) is a groundbreaking cryptographic technique that allows computations to be performed directly on encrypted data without\n" + "who invented FHE?\n", + "FHE was first proposed by Craig Gentry in 2009, making it easier to use plaintexts on encrypted data. His breakthrough demonstrated ability to\n" ] } ], @@ -1095,9 +891,9 @@ "# Seed for reproducibility\n", "torch.manual_seed(SEED)\n", "\n", - "prompt = \"What is FHE?\"\n", - "generated_text = generate_text(prompt, fine_tuned_model, tokenizer)\n", - "print(generated_text)" + "fine_tuned_model.enable_adapter_layers()\n", + "prompt = \"who invented FHE?\"\n", + "generate_and_print(prompt, fine_tuned_model, tokenizer, SEED)" ] }, { @@ -1111,9 +907,7 @@ "output_type": "stream", "text": [ "What is FHE?\n", - "\n", - "FHE is a term that refers to the ability to generate a number of numbers from a given number.\n", - ". The number is the number\n" + "FHE is a new type of energy storage that is designed to be used in a variety of applications. It is used to store energy in\n" ] } ], @@ -1125,8 +919,7 @@ "peft_model.disable_adapter_layers()\n", "\n", "prompt = \"What is FHE?\"\n", - "generated_text = generate_text(prompt, fine_tuned_model, tokenizer)\n", - "print(generated_text)" + "generate_and_print(prompt, peft_model, tokenizer, SEED)" ] }, { diff --git a/use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt b/use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt index c30719d71..b15cbe26c 100644 --- a/use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt +++ b/use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt @@ -1,47 +1,68 @@ -Title: An In-depth Look at Fully Homomorphic Encryption (FHE) and Its Applications - -Introduction - -Fully Homomorphic Encryption (FHE) is a groundbreaking cryptographic technique that allows computations to be performed directly on encrypted data without the need for decryption. This revolutionary technology has the potential to significantly enhance privacy and security in various fields, including cloud computing, healthcare, finance, and more. This document aims to provide a comprehensive understanding of FHE, its working principles, use cases, and its potential impact on the future of data privacy and security. - -Understanding Fully Homomorphic Encryption - -To understand FHE, it is essential first to grasp the concept of homomorphic encryption. Homomorphic encryption is a method that allows computations on ciphertexts, generating an encrypted result that, when decrypted, matches the result of operations performed on the plaintext. There are different types of homomorphic encryption schemes, such as partially homomorphic encryption (PHE) and somewhat homomorphic encryption (SHE). However, FHE is the most advanced and versatile form as it supports an arbitrary number of operations on encrypted data without any degradation of the ciphertext. - -The concept of FHE was first introduced by Rivest, Adleman, and Dertouzos in 1978. However, it was not until 2009 that Craig Gentry, a computer scientist, proposed a practical and viable FHE scheme. Gentry's breakthrough involved creating a bootstrapping technique that allows the evaluation of arbitrary circuits on encrypted data. - -Working Principles of FHE - -FHE involves three primary steps: encryption, evaluation, and decryption. - -1. Encryption: In FHE, the data owner encrypts the data using a public key generated by the FHE scheme. The encrypted data is then sent to the data processor or cloud server for computation. - -2. Evaluation: The data processor performs computations directly on the encrypted data without decrypting it. The FHE scheme ensures that the result remains encrypted even after the computation. - -3. Decryption: The encrypted result is sent back to the data owner, who can then decrypt it using the secret key. The decrypted output matches the result that would have been obtained if the operations had been performed on the plaintext. - -Use Cases and Applications of FHE - -FHE has immense potential in various fields where data privacy and security are paramount. Some of the most promising use cases include: - -1. Cloud Computing: FHE can enable secure outsourcing of computations to cloud service providers. By using FHE, organizations can store and process sensitive data in the cloud without revealing the actual data to the service provider. - -2. Healthcare: FHE can facilitate the secure sharing and analysis of electronic health records (EHRs) without compromising patient privacy. It can enable medical researchers to perform statistical analysis and develop new treatments based on encrypted patient data. - -3. Finance: Financial institutions can leverage FHE to securely process sensitive financial transactions and perform risk analysis without revealing the underlying data. - -4. Artificial Intelligence and Machine Learning: FHE can enable privacy-preserving machine learning by allowing computations on encrypted data sets. This can help protect intellectual property and maintain data privacy while still benefiting from collaborative learning models. - -Challenges and Future Directions - -Despite the significant potential of FHE, there are several challenges that need to be addressed before it can be widely adopted. Some of these challenges include: - -1. Performance: FHE schemes are currently computationally intensive and require significant processing power and storage. Researchers are working on optimizing FHE algorithms and developing hardware accelerators to improve performance. - -2. Implementation Complexity: Implementing FHE schemes requires advanced cryptographic expertise, which can be a barrier for many organizations. Developing user-friendly FHE libraries and tools can help address this challenge. - -3. Standardization: There is a need for standardization of FHE schemes to ensure interoperability and facilitate widespread adoption. - -Conclusion - -Fully Homomorphic Encryption is a transformative technology that holds the promise of revolutionizing data privacy and security. By enabling computations on encrypted data, FHE can help protect sensitive information in various domains, including cloud computing, healthcare, finance, and artificial intelligence. While there are challenges to be addressed, ongoing research and development efforts are paving the way for FHE to become a practical and widely adopted solution for enhancing privacy and security in the digital age. \ No newline at end of file +What is the primary advantage of FHE? +The primary advantage of Fully Homomorphic Encryption (FHE) is that it enables computations on encrypted data without exposing the underlying plaintext. This ensures data privacy even while processing, making it ideal for use cases where sensitive data, like financial or healthcare information, needs to be analyzed without being decrypted. +How does FHE differ from traditional encryption? +Traditional encryption methods require data to be decrypted before any meaningful computation can be performed on it. In contrast, Fully Homomorphic Encryption (FHE) allows computations to be performed directly on encrypted data, ensuring that the data remains secure throughout the entire process. +What are the practical applications of FHE? +Fully Homomorphic Encryption (FHE) can be applied in various fields such as secure cloud computing, privacy-preserving machine learning, encrypted databases, healthcare data analysis, and secure voting systems. Its ability to maintain encryption while processing data is particularly useful in industries where data privacy is critical. +How does FHE impact cloud computing? +FHE transforms cloud computing by allowing sensitive data to be processed by third-party cloud providers without ever exposing the data. This means companies can outsource data storage and processing without compromising privacy, as the data remains encrypted even during computation. +What are the key challenges with FHE? +Some key challenges with Fully Homomorphic Encryption (FHE) include computational efficiency and resource intensity. FHE algorithms require significantly more processing power and time compared to traditional encryption methods, making them less practical for real-time applications until further optimizations are made. +Can FHE be used in healthcare? +Yes, Fully Homomorphic Encryption (FHE) has promising applications in healthcare, where it can be used to securely analyze patient data, perform diagnostics, and enable research on sensitive medical information—all while ensuring that the data remains encrypted and private. +What is the relationship between FHE and privacy-preserving machine learning? +FHE plays a crucial role in privacy-preserving machine learning by allowing models to train and make predictions on encrypted data. This ensures that neither the data owner nor the model owner needs to expose their sensitive information, creating a secure environment for collaboration and data sharing. +How does FHE ensure data privacy? +FHE ensures data privacy by allowing computations on encrypted data without the need for decryption. As the data never exists in an unencrypted state during processing, unauthorized access is prevented, significantly reducing the risk of data breaches or exposure. +What are the security implications of FHE? +Fully Homomorphic Encryption (FHE) dramatically improves security by keeping data encrypted throughout its lifecycle, even during computations. This makes it far more difficult for malicious actors to access sensitive information, thus enhancing both data confidentiality and integrity. +What are the different types of homomorphic encryption? +There are three main types of homomorphic encryption: Partially Homomorphic Encryption (PHE), which supports a single type of operation (addition or multiplication); Somewhat Homomorphic Encryption (SHE), which supports limited operations on ciphertexts; and Fully Homomorphic Encryption (FHE), which supports arbitrary computations on encrypted data. +Is FHE currently practical for everyday use? +While Fully Homomorphic Encryption (FHE) has enormous potential, it is not yet practical for all everyday use cases due to its high computational overhead. However, ongoing research and optimization efforts are steadily making FHE more efficient, bringing it closer to broader adoption. +How is FHE related to public key encryption? +FHE is a type of public key encryption where computations can be performed on ciphertexts. Like public key systems, FHE uses a pair of keys—one for encryption and one for decryption. However, FHE goes beyond traditional public key encryption by enabling computations directly on the encrypted data. +What industries can benefit most from FHE? +Industries dealing with sensitive or confidential information, such as finance, healthcare, government, and cybersecurity, can benefit the most from Fully Homomorphic Encryption (FHE). These industries require robust privacy measures during data processing, making FHE an ideal solution for secure computations. +What is the future outlook for FHE? +The future of Fully Homomorphic Encryption (FHE) is promising, with ongoing advancements aimed at making it more efficient and accessible. As technology improves, FHE has the potential to become a standard tool for ensuring data privacy in a wide range of applications, from cloud computing to artificial intelligence. +What are some real-world use cases of FHE? +Real-world use cases of Fully Homomorphic Encryption (FHE) include secure financial transactions, privacy-preserving healthcare data analysis, encrypted cloud computing, secure machine learning, and even anonymous voting systems. FHE allows for the processing of sensitive data in all these fields without compromising privacy or security. +Who invented Fully Homomorphic Encryption (FHE)? +Fully Homomorphic Encryption (FHE) was first proposed by Craig Gentry in 2009. His breakthrough demonstrated how to perform arbitrary computations on encrypted data without needing to decrypt it, laying the foundation for what is now a growing field of research and application in secure computation. +What are the different schemes in FHE? +The different schemes in Fully Homomorphic Encryption (FHE) include Gentry’s original scheme, based on lattice-based cryptography, as well as more recent schemes such as BGV (Brakerski-Gentry-Vaikuntanathan), CKKS (Cheon-Kim-Kim-Song), and TFHE (Fast Torus FHE). Each of these schemes has different efficiency and functionality trade-offs, with some optimized for integer arithmetic, and others for real numbers or high-speed performance. +How does the CKKS scheme work in FHE? +The CKKS (Cheon-Kim-Kim-Song) scheme in FHE is designed to support approximate arithmetic on encrypted data, making it ideal for use cases such as privacy-preserving machine learning and signal processing. It allows for the encoding of real numbers and can handle a variety of computations, although with some trade-offs in precision. +What is the TFHE scheme, and why is it important? +TFHE (Torus Fully Homomorphic Encryption) is a fast FHE scheme that supports binary gates, making it highly efficient for low-latency applications. It’s particularly important for use cases like secure real-time decision-making, and it offers some of the best performance for practical implementations of FHE today. +How does FHE improve data privacy in finance? +In the financial sector, FHE enables secure computations on sensitive data such as transactions, credit scores, or risk assessments, without exposing the raw data to intermediaries or third parties. This ensures confidentiality and compliance with data privacy regulations like GDPR while still allowing valuable insights and analytics. +Can FHE be used for secure voting systems? +Yes, FHE can be used for secure voting systems by enabling the counting and tallying of votes on encrypted ballots. This ensures that the votes remain confidential throughout the process, preventing tampering or breaches while maintaining the integrity of the election results. +What is bootstrapping in FHE, and why is it important? +Bootstrapping in FHE is a technique that refreshes a ciphertext to reduce the noise that accumulates during computations. Without bootstrapping, the number of operations that can be performed on encrypted data is limited. Bootstrapping is essential for performing an unlimited number of computations in FHE schemes, although it is computationally expensive. +How is noise handled in FHE computations? +In FHE, noise is an inherent part of the encryption process and grows with each computation. If the noise becomes too large, it can cause the ciphertext to become unusable. Different FHE schemes manage this through bootstrapping or by limiting the depth of computations that can be performed before decryption. +How does FHE enhance privacy in machine learning? +FHE allows machine learning models to train on encrypted datasets, enabling privacy-preserving AI applications. This ensures that both the data and the model parameters remain confidential, allowing sensitive data to be used without exposing it to the model owner or external parties. +What is the BGV scheme in FHE? +The BGV (Brakerski-Gentry-Vaikuntanathan) scheme is an FHE scheme that supports both addition and multiplication operations over encrypted data, making it suitable for more complex computations. It is widely used in applications requiring arbitrary computations on encrypted integers, such as secure data analytics. +How does FHE contribute to secure cloud storage? +FHE contributes to secure cloud storage by allowing encrypted data to be processed directly in the cloud without needing to decrypt it. This enables users to leverage cloud computing resources for data analysis or machine learning while ensuring that their sensitive information remains private and secure from cloud service providers. +How is FHE applied in healthcare for privacy-preserving diagnostics? +FHE allows healthcare providers to securely analyze patient data, perform diagnostics, or even develop treatment plans without ever decrypting the sensitive information. This preserves the privacy of medical records and enables collaboration between healthcare institutions while complying with stringent privacy laws. +How does FHE handle real-time data processing? +While FHE has traditionally been too slow for real-time data processing due to its computational overhead, recent advancements in schemes like TFHE are making real-time applications more feasible. These schemes are optimized for low-latency computations, such as real-time decision-making in encrypted environments. +Why is FHE important for compliance with data protection laws? +FHE is critical for compliance with data protection laws like GDPR and HIPAA because it allows sensitive data to be processed without ever exposing the underlying information. This ensures that organizations can securely handle private data while adhering to strict privacy regulations. +What role does lattice-based cryptography play in FHE? +Lattice-based cryptography is the foundation of many FHE schemes, including the original scheme proposed by Craig Gentry. Lattices provide a mathematical structure that allows for secure, efficient encryption and supports the complex operations required for performing computations on encrypted data. +Is FHE compatible with blockchain technology? +FHE can be integrated with blockchain technology to enable privacy-preserving smart contracts and secure data transactions on encrypted data. This ensures that sensitive information stored or processed within blockchain networks remains confidential, even during computations or transactions. +How is FHE being optimized for future use? +FHE is being optimized through algorithmic improvements that reduce its computational overhead and increase efficiency. Newer schemes, better hardware acceleration, and techniques like batching and bootstrapping optimization are making FHE more practical for everyday applications. +What are you? +I am a GPT-2 model fine-tuned by Zama using Fully Homomorphic Encryption (FHE) principles. My purpose is to help users understand and interact with FHE concepts, allowing secure and private data computations. By integrating FHE into my training, I offer insights while preserving the confidentiality of sensitive information. +What are you? +I am a GPT-2 model fine-tuned by Zama using Fully Homomorphic Encryption (FHE) principles. My purpose is to help users understand and interact with FHE concepts, allowing secure and private data computations. By integrating FHE into my training, I offer insights while preserving the confidentiality of sensitive information. diff --git a/use_case_examples/lora_finetuning/requirements.txt b/use_case_examples/lora_finetuning/requirements.txt index 9c97da539..7ea93063a 100644 --- a/use_case_examples/lora_finetuning/requirements.txt +++ b/use_case_examples/lora_finetuning/requirements.txt @@ -1,4 +1,8 @@ +-e ../../. transformers==4.41.2 peft==0.11.1 Jinja2==3.1.4 matplotlib==3.7.5 +datasets==3.0.1 +jupyter==1.0.0 +tqdm==4.66.5 \ No newline at end of file diff --git a/use_case_examples/lora_finetuning/utils_lora.py b/use_case_examples/lora_finetuning/utils_lora.py index 36135a489..1cad80804 100644 --- a/use_case_examples/lora_finetuning/utils_lora.py +++ b/use_case_examples/lora_finetuning/utils_lora.py @@ -1,17 +1,42 @@ # Utility functions for LoRA finetuning notebook -from torch.nn import Embedding -from transformers import Conv1D +import os +import random +import numpy as np +import torch +import torch.backends.cudnn as cudnn + + +def generate_and_print(prompt, model, tokenizer, seed=None, max_new_tokens=30): + """ + Generates text based on the provided prompt and prints both the prompt and the generated text. + + Args: + prompt (str): The input prompt to generate text from. + model: The pre-trained language model. + tokenizer: The tokenizer associated with the model. + seed (int, optional): Seed for random number generators to ensure reproducibility. + max_new_tokens (int, optional): Maximum number of tokens to generate. Defaults to 30. + """ + # Set the environment variable for CuBLAS deterministic behavior + os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" + + # Set the random seed for reproducibility + if seed is not None: + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) -def generate_text(prompt, model, tokenizer, max_new_tokens=30): # Encode the input prompt inputs = tokenizer.encode_plus(prompt, return_tensors="pt") # Generate text output = model.generate( - input_ids=inputs["input_ids"], - attention_mask=inputs["attention_mask"], + input_ids=inputs["input_ids"].to(model.device), + attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=max_new_tokens, num_return_sequences=1, no_repeat_ngram_size=2, @@ -24,7 +49,13 @@ def generate_text(prompt, model, tokenizer, max_new_tokens=30): # Decode the generated text generated_text = tokenizer.decode(output[0], skip_special_tokens=True) - return generated_text + + # Remove the prompt from the generated text if it is included + if generated_text.startswith(prompt): + generated_text = generated_text[len(prompt) :].strip() + + # Print the user prompt and the generated text separated by a newline + print(f"{prompt}\n{generated_text}") def print_weights_and_size(model, print_detail=False):