diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py
index 3af1800b5..4414056fb 100644
--- a/src/concrete/ml/common/utils.py
+++ b/src/concrete/ml/common/utils.py
@@ -42,6 +42,14 @@
 # Indicate if the old simulation method should be used when simulating FHE executions
 USE_OLD_VL = True
 
+# Debug option for testing round PBS optimization
+# Setting this option to true will make quantizers "round half up"
+# For example: 0.5 -> 1, 1.5 -> 2 instead of "round half to even"
+# When the option is set to false, Concrete ML uses numpy.rint
+# which has the same behavior as torch.round -> Brevitas nets
+# should be exact compared to their Concrete ML QuantizedModule
+QUANT_ROUND_LIKE_ROUND_PBS = False
+
 
 class FheMode(str, enum.Enum):
     """Enum representing the execution mode.
diff --git a/src/concrete/ml/onnx/ops_impl.py b/src/concrete/ml/onnx/ops_impl.py
index f1d64f712..d06489a49 100644
--- a/src/concrete/ml/onnx/ops_impl.py
+++ b/src/concrete/ml/onnx/ops_impl.py
@@ -14,6 +14,7 @@
 from scipy import special
 from typing_extensions import SupportsIndex
 
+from ..common import utils
 from ..common.debugging import assert_false, assert_true
 from .onnx_impl_utils import (
     compute_onnx_pool_padding,
@@ -1653,7 +1654,10 @@ def numpy_brevitas_quant(
     y = numpy.clip(y, min_int_val, max_int_val)
 
     # Quantize to produce integers representing the float quantized values
-    y = numpy.rint(y)
+    if utils.QUANT_ROUND_LIKE_ROUND_PBS:
+        y = numpy.floor(y + 0.5)
+    else:
+        y = numpy.rint(y)
 
     # Compute quantized floating point values
     y = (y - zero_point) * scale
diff --git a/src/concrete/ml/quantization/quantized_module_passes.py b/src/concrete/ml/quantization/quantized_module_passes.py
index 35a51eb78..bda911c27 100644
--- a/src/concrete/ml/quantization/quantized_module_passes.py
+++ b/src/concrete/ml/quantization/quantized_module_passes.py
@@ -295,11 +295,9 @@ def integer_log2(value: float) -> Tuple[int, bool]:
                 # number of lsbs to round is the negative of the sum of log2
                 # of the scale factors
                 lsbs_to_round = -(log2_input + log2_weights - log2_output)
-                # log2_output - log2_input - log2_weights
-                # TODO: check this part with Andrei
-                # How is it possible to have like that?
-                path_start_node.rounding_threshold_bits = lsbs_to_round
-                path_start_node.lsbs_to_remove = lsbs_to_round
+                if lsbs_to_round > 0:
+                    path_start_node.rounding_threshold_bits = lsbs_to_round
+                    path_start_node.lsbs_to_remove = lsbs_to_round
             else:
                 invalid_paths.append(path_start_node)
 
diff --git a/src/concrete/ml/quantization/quantizers.py b/src/concrete/ml/quantization/quantizers.py
index 73e9d145d..c3317cea4 100644
--- a/src/concrete/ml/quantization/quantizers.py
+++ b/src/concrete/ml/quantization/quantizers.py
@@ -7,6 +7,7 @@
 
 import numpy
 
+from ..common import utils
 from ..common.debugging import assert_true
 from ..common.serialization.dumpers import dump, dumps
 
@@ -745,7 +746,10 @@ def quant(self, values: numpy.ndarray) -> numpy.ndarray:
         assert self.offset is not None
         assert self.scale is not None
 
-        qvalues = numpy.rint(values / self.scale + self.zero_point)
+        if utils.QUANT_ROUND_LIKE_ROUND_PBS:
+            qvalues = numpy.floor(values / self.scale + self.zero_point + 0.5)
+        else:
+            qvalues = numpy.rint(values / self.scale + self.zero_point)
 
         # Clipping can be performed for PTQ and for precomputed (for now only Brevitas) QAT
         # (where quantizer parameters are available in ONNX layers).
diff --git a/src/concrete/ml/sklearn/qnn_module.py b/src/concrete/ml/sklearn/qnn_module.py
index 8fa46de76..92d415e6d 100644
--- a/src/concrete/ml/sklearn/qnn_module.py
+++ b/src/concrete/ml/sklearn/qnn_module.py
@@ -9,7 +9,6 @@
 from torch import nn
 
 from ..common.debugging import assert_true
-from ..common.utils import MAX_BITWIDTH_BACKWARD_COMPATIBLE
 from ..quantization.qat_quantizers import Int8ActPerTensorPoT, Int8WeightPerTensorPoT
 
 
@@ -29,14 +28,15 @@ def __init__(
         n_layers: int,
         n_outputs: int,
         n_hidden_neurons_multiplier: int = 4,
-        n_w_bits: int = 3,
-        n_a_bits: int = 3,
-        n_accum_bits: int = MAX_BITWIDTH_BACKWARD_COMPATIBLE,
+        n_w_bits: int = 4,
+        n_a_bits: int = 4,
+        # No pruning by default as roundPBS keeps the PBS precision low
+        n_accum_bits: int = 32,
         n_prune_neurons_percentage: float = 0.0,
         activation_function: Type = nn.ReLU,
         quant_narrow: bool = False,
         quant_signed: bool = True,
-        power_of_two_scaling: bool = False,
+        power_of_two_scaling: bool = True,  # Default to true: use roundPBS to speed up the NNs
     ):
         """Sparse Quantized Neural Network constructor.
 
diff --git a/tests/torch/test_brevitas_qat.py b/tests/torch/test_brevitas_qat.py
index b9b8c9751..216148106 100644
--- a/tests/torch/test_brevitas_qat.py
+++ b/tests/torch/test_brevitas_qat.py
@@ -15,6 +15,7 @@
 from torch import nn
 from torch.utils.data import DataLoader, TensorDataset
 
+from concrete.ml.common import utils
 from concrete.ml.common.utils import (
     is_classifier_or_partial_classifier,
     is_regressor_or_partial_regressor,
@@ -514,6 +515,8 @@ def test_brevitas_power_of_two(
 
     net, x_all, _ = train_brevitas_network_tinymnist(is_cnn, n_bits, True, False, power_of_two)
 
+    utils.QUANT_ROUND_LIKE_ROUND_PBS = True
+
     # If rounding threshold is set -> nothing happens
     # If Quantizer is not setup -> nothing happens
     quantized_module = compile_brevitas_qat_model(
@@ -590,11 +593,7 @@ def test_brevitas_power_of_two(
     )
 
     # # Compare the result with the optimized network and without
-    # # they should be equal (allow 3 non-matching value out of 100)
-    # TODO: actually verify correctness here, this is just a placeholder
-    # https://github.com/zama-ai/concrete-ml-internal/issues/3946
-    assert y_pred_sim_round.shape == y_pred_clear_round.shape
-    assert y_pred_clear_round.shape == y_pred_clear_no_round.shape
-
-    # assert numpy.sum(y_pred_sim_round != y_pred_clear_round) <= 3
-    # assert numpy.sum(y_pred_clear_round != y_pred_clear_no_round) <= 3
+    # # they should be equal
+
+    assert numpy.sum(y_pred_sim_round != y_pred_clear_round) == 0
+    assert numpy.sum(y_pred_clear_round != y_pred_clear_no_round) == 0