Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add logs + evaluation llama lora #962

Merged
merged 5 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
458 changes: 393 additions & 65 deletions src/concrete/ml/quantization/linear_op_glwe_backend.py

Large diffs are not rendered by default.

32 changes: 30 additions & 2 deletions src/concrete/ml/torch/hybrid_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,20 @@ def _apply(self, fn, recurse=True):
"""
return self

def _ensure_module_on_device(self, x: torch.Tensor) -> None:
"""Ensure the private module is on the same device as the input tensor.

Args:
x (torch.Tensor): The input tensor to match device with.
"""
assert self.private_module is not None

# Check if any parameter is not on the same device as the input tensor
if any(
param.device != x.device for param in self.private_module.parameters()
): # pragma: no cover
self.private_module = self.private_module.to(x.device) # pragma: no cover

def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
"""Forward pass of the remote module.

Expand Down Expand Up @@ -272,6 +286,7 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
assert self.private_module is not None
assert self.calibration_data is not None
self.calibration_data.append(x.detach())
self._ensure_module_on_device(x)
y = self.private_module(x)
assert isinstance(y, (QuantTensor, torch.Tensor))

Expand All @@ -284,6 +299,8 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
elif self.fhe_local_mode == HybridFHEMode.TORCH:
# Using torch layers
assert self.private_module is not None
# Move private module parameters to same device as input if needed
self._ensure_module_on_device(x)
y = self.private_module(x)
else: # pragma:no cover
# Shouldn't happen
Expand Down Expand Up @@ -555,6 +572,7 @@ def compile_model(
p_error: Optional[float] = None,
device: str = "cpu",
configuration: Optional[Configuration] = None,
use_dynamic_quantization: bool = False,
):
"""Compiles the specific layers to FHE.

Expand All @@ -569,7 +587,13 @@ def compile_model(
device: FHE compilation device, can be either 'cpu' or 'cuda'.
configuration (Configuration): A concrete Configuration object specifying the FHE
encryption parameters. If not specified, a default configuration is used.
use_dynamic_quantization (bool): If True, use dynamic quantization;
otherwise, use static quantization. (only for GLWE backend)
"""
assert (
has_glwe_backend() or not use_dynamic_quantization
), "Dynamic quantization requires GLWE backend"

# We do a forward pass where we accumulate inputs to use for compilation
self.set_fhe_mode(HybridFHEMode.CALIBRATE)

Expand Down Expand Up @@ -600,7 +624,9 @@ def compile_model(
# then simply quantize the model without compiling with
# Concrete Python.
if self._has_only_large_linear_layers and has_glwe_backend():
self.executor = GLWELinearLayerExecutor()
self.executor = GLWELinearLayerExecutor(
use_dynamic_quantization=use_dynamic_quantization
)
self.private_q_modules[name] = build_quantized_module(
self.private_modules[name],
calibration_data_tensor,
Expand All @@ -612,7 +638,9 @@ def compile_model(
vals = self.private_q_modules[name].quant_layers_dict.values()
_, q_op = next(iter(vals))
const_inp = q_op.constant_inputs[1] # Get the weights, the bias is in [2]
const_inp.values = const_inp.qvalues.astype(numpy.float32)

if not use_dynamic_quantization:
const_inp.values = const_inp.qvalues.astype(numpy.float32)
const_inp.qvalues = const_inp.qvalues.astype(numpy.int16)
else:
self.private_q_modules[name] = compile_torch_model(
Expand Down
Loading
Loading