Skip to content

Commit

Permalink
chore/improve_gpt2_use_case (#915)
Browse files Browse the repository at this point in the history
  • Loading branch information
jfrery authored Oct 10, 2024
1 parent cf7be06 commit fefc19d
Show file tree
Hide file tree
Showing 9 changed files with 488 additions and 620 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Concrete ML built-in models have APIs that are almost identical to their scikit-

- [Encrypted Large Language Model](use_case_examples/llm/): converting a user-defined part of a Large Language Model for encrypted text generation. This demo shows the trade-off between quantization and accuracy for text generation and shows how to run the model in FHE.
- [Private inference for federated learned models](use_case_examples/federated_learning/): private training of a Logistic Regression model and then importing the model into Concrete ML and performing encrypted prediction.
- [Titanic](use_case_examples/titanic/KaggleTitanic.ipynb): solving the [Kaggle Titanic competition](https://www.kaggle.com/c/titanic/). Implemented with XGBoost from Concrete ML, this example comes as a companion of the [Kaggle notebook](https://www.kaggle.com/code/concretemlteam/titanic-with-privacy-preserving-machine-learning), and was the subject of a blogpost in [KDnuggets](https://www.kdnuggets.com/2022/08/machine-learning-encrypted-data.html).
- [Titanic](use_case_examples/titanic/KaggleTitanic.ipynb): solving the [Kaggle Titanic competition](https://www.kaggle.com/c/titanic/). Implemented with XGBoost from Concrete ML, this example comes as a companion of the [Kaggle notebook](https://www.kaggle.com/code/concretemlteam/titanic-with-privacy-preserving-machine-learning).
- [CIFAR10 FHE-friendly model with Brevitas](use_case_examples/cifar/cifar_brevitas_training): training a VGG9 FHE-compatible neural network using Brevitas, and a script to run the neural network in FHE. Execution in FHE takes ~4 minutes per image and shows an accuracy of 88.7%.
- [CIFAR10 / CIFAR100 FHE-friendly models with Transfer Learning approach](use_case_examples/cifar/cifar_brevitas_finetuning): series of three notebooks, that convert a pre-trained FP32 VGG11 neural network into a quantized model using Brevitas. The model is fine-tuned on the CIFAR data-sets, converted for FHE execution with Concrete ML and evaluated using FHE simulation. For CIFAR10 and CIFAR100, respectively, our simulations show an accuracy of 90.2% and 68.2%.

Expand Down
11 changes: 5 additions & 6 deletions script/make_utils/nbqa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function nbqa_ize()
# %matplotlib inline
# --extend-ignore=DAR is because we don't want to run darglint
poetry run nbqa flake8 "${NB}" --max-line-length 100 --per-file-ignores="__init__.py:F401" \
--ignore=E402 --extend-ignore=DAR
--ignore=E402,W503 --extend-ignore=DAR

# With some ignored errors, since we don't care:
# that the notebook filename is capitalized (invalid-name)
Expand All @@ -46,9 +46,10 @@ function nbqa_ize()
--disable=missing-module-docstring --disable=missing-class-docstring \
--disable=missing-function-docstring \
--disable=wrong-import-position --disable=ungrouped-imports \
--disable=wrong-import-order\
--disable=wrong-import-order \
--extension-pkg-whitelist=numpy --disable=redefined-outer-name \
$PYLINT_EXTRA_OPTIONS
--disable=line-too-long \
${PYLINT_EXTRA_OPTIONS}
fi
}

Expand Down Expand Up @@ -99,6 +100,4 @@ then
echo "Running nbqa on ${NOTEBOOK}"
PYLINT_EXTRA_OPTIONS=""
nbqa_ize "${NOTEBOOK}" "${PYLINT_EXTRA_OPTIONS}"
fi


fi
2 changes: 2 additions & 0 deletions src/concrete/ml/torch/hybrid_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,8 @@ def compile_model(
"""
# We do a forward pass where we accumulate inputs to use for compilation
self.set_fhe_mode(HybridFHEMode.CALIBRATE)

# Run the model to get the calibration data
self.model(x)

self.configuration = configuration
Expand Down
60 changes: 39 additions & 21 deletions src/concrete/ml/torch/lora.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This module contains classes for LoRA (Low-Rank Adaptation) training and custom layers."""

from typing import List
from typing import List, Tuple, Union

import torch

Expand Down Expand Up @@ -32,15 +32,16 @@ class LoraTraining(torch.nn.Module):
Args:
inference_model (torch.nn.Module): The base model to be fine-tuned.
n_layers_to_skip (int): Number of layers to skip. Linear layers that do not require
gradient to be propagated are skipped. Defaults to 1.
"""

def __init__(self, inference_model) -> None:
def __init__(self, inference_model, n_layers_to_skip: int = 1) -> None:
super().__init__()

self.inference_model = inference_model

self.replace_layers_with_custom(self.inference_model)
self.replace_layers_with_custom(self.inference_model, n_layers_to_skip)

self.optimizer = None
self.lr_scheduler = None
Expand All @@ -52,29 +53,27 @@ def __init__(self, inference_model) -> None:
self.run_optimizer = False

@staticmethod
def replace_layers_with_custom(model: torch.nn.Module, skip_first: bool = True):
def replace_layers_with_custom(model: torch.nn.Module, n_layers_to_skip: int):
"""Replace linear layers with custom ones.
This method replaces eligible linear layers in the model with custom layers
that are compatible with the LoRA training procedure.
Args:
model (torch.nn.Module): The model to replace layers in.
skip_first (bool): Whether to skip the first eligible layer.
n_layers_to_skip (int): Number of layers to skip.
"""
# Flag to track if the first layer has been skipped
skipped = False

def _replace(module: torch.nn.Module):
nonlocal skipped
nonlocal n_layers_to_skip
for name, child in list(module.named_children()):
# Skip modules containing "lora" in their name
if "lora" in name:
continue

if isinstance(child, LINEAR_LAYERS):
if skip_first and not skipped:
skipped = True
if n_layers_to_skip > 0:
n_layers_to_skip -= 1

# Skip the first eligible layer
continue
Expand Down Expand Up @@ -129,38 +128,57 @@ def update_training_parameters(
self.gradient_accumulation_steps = 1
self.max_grad_norm = None

def forward(self, inputs):
def forward(
self, inputs: Tuple[torch.Tensor, ...]
) -> Tuple[torch.Tensor, Union[torch.Tensor, None]]:
"""Forward pass of the LoRA training module.
Args:
inputs: A tuple containing input tensors and labels.
inputs (tuple): A tuple containing the input tensors. The first two elements should be
the features and the labels. Additional elements will be passed
to the model as needed.
Returns:
A tuple containing the loss and gradient norm.
Raises:
ValueError: If the model does not return a loss when `self.loss_fn` is None.
"""
assert (
len(inputs) >= 2
), "Expected at least two inputs in the tuple: inputs (x) and targets (y)"

# Remove this once hybrid model supports multiple inputs
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4568
x, y = inputs
# Extract x (input features) and y (labels)
x, y = inputs[0], inputs[1]

# Forward pass
if self.loss_fn is None:
# Additional inputs, if any (e.g., attention_mask)
additional_inputs = inputs[2:]

# Assume model computes loss internally
outputs = self.inference_model(x, labels=y)
# If no loss function is provided, we assume the model can compute the loss internally
if self.loss_fn is None:
# Forward pass through the inference model with labels
outputs = self.inference_model(x, labels=y, *additional_inputs)

# Use getattr to safely access the loss attribute
# Use getattr to safely access the loss attribute from the outputs
loss = getattr(outputs, "loss", None)
if loss is None:
raise ValueError(
"The model did not return a loss. Ensure that 'labels' are correctly provided."
)
else:
outputs = self.inference_model(x)
# Forward pass through the inference model without labels
outputs = self.inference_model(x, *additional_inputs)

# If the outputs contain several keys, extract the logits
if isinstance(outputs, dict) and "logits" in outputs:
outputs = outputs["logits"]

# Compute the loss using the provided loss function
loss = self.loss_fn(outputs, y)

# Scale the loss based on gradient accumulation
loss = loss / self.gradient_accumulation_steps

# Update gradients
Expand Down Expand Up @@ -188,7 +206,7 @@ def forward(self, inputs):
elif self.calibrate:
self.inference_model.zero_grad()

return (loss, grad_norm)
return loss, grad_norm

def toggle_calibrate(self, enable: bool = True):
"""Toggle calibration mode.
Expand Down
14 changes: 7 additions & 7 deletions tests/torch/test_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def forward(self, x, labels=None):
loss = ((logits - labels) ** 2).mean()
Output = namedtuple("Output", ["loss"])
return Output(loss=loss)
return logits
return {"logits": logits, "something_else": torch.tensor(1.0)}


@pytest.fixture
Expand All @@ -89,20 +89,20 @@ def base_lora_training(base_inference_model):
return LoraTraining(base_inference_model)


@pytest.mark.parametrize("skip_first", [True, False])
def test_lora_training_replace_layers(base_lora_training, skip_first):
@pytest.mark.parametrize("n_layers_to_skip", [0, 1, 2])
def test_lora_training_replace_layers(base_lora_training, n_layers_to_skip):
"""Test that LoraTraining replaces layers correctly."""
original_linear1 = base_lora_training.inference_model.linear1
original_lora_layer = base_lora_training.inference_model.lora_layer

# Replace layers with custom layers
base_lora_training.replace_layers_with_custom(
base_lora_training.inference_model, skip_first=skip_first
base_lora_training.inference_model, n_layers_to_skip=n_layers_to_skip
)

inference_model = base_lora_training.inference_model

if skip_first:
if n_layers_to_skip > 0:
# First eligible layer should be skipped
assert inference_model.linear1 is original_linear1
else:
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_lora_training_forward_with_loss_fn(base_lora_training):
y = torch.tensor([[0.5, 1.5]])

outputs = base_lora_training.inference_model(x)
expected_loss = loss_fn(outputs, y) / base_lora_training.gradient_accumulation_steps
expected_loss = loss_fn(outputs["logits"], y) / base_lora_training.gradient_accumulation_steps

loss, _ = base_lora_training((x, y))

Expand Down Expand Up @@ -225,7 +225,7 @@ def test_lora_training_forward_with_optimizer(base_lora_training):
SimpleNamespace(gradient_accumulation_steps=1, max_grad_norm=1.0),
)
base_lora_training.replace_layers_with_custom(
base_lora_training.inference_model, skip_first=False
base_lora_training.inference_model, n_layers_to_skip=0
)
base_lora_training.toggle_run_optimizer(True)

Expand Down
Loading

0 comments on commit fefc19d

Please sign in to comment.