Skip to content

Commit

Permalink
config migration: int*
Browse files Browse the repository at this point in the history
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

ghstack-source-id: aee7edc748ec6d829a953caa7bd3395c5e6c5d3d
ghstack-comment-id: 2649752838
Pull Request resolved: #1696
  • Loading branch information
vkuzo committed Feb 14, 2025
1 parent da35915 commit fb9c3ff
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 119 deletions.
1 change: 1 addition & 0 deletions test/dtypes/test_affine_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def test_flatten_unflatten(self, device, dtype):
linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
if isinstance(apply_quant, AOBaseConfig):
quantize_(linear, apply_quant)
ql = linear
else:
# TODO(#1690): delete this once config migration is done
ql = apply_quant(linear)
Expand Down
13 changes: 12 additions & 1 deletion test/quantization/test_quant_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
float8_dynamic_activation_float8_weight,
float8_static_activation_float8_weight,
float8_weight_only,
int4_dynamic_activation_int4_weight,
int4_weight_only,
int8_dynamic_activation_int4_weight,
int8_dynamic_activation_int8_weight,
Expand All @@ -50,6 +51,7 @@
TORCH_VERSION_AT_LEAST_2_5,
TORCH_VERSION_AT_LEAST_2_6,
is_sm_at_least_89,
is_sm_at_least_90,
unwrap_tensor_subclass,
)

Expand Down Expand Up @@ -798,6 +800,10 @@ def test_int4wo_cpu(self, dtype, x_dim):
float8_weight_only(),
float8_dynamic_activation_float8_weight(),
float8_static_activation_float8_weight(scale=torch.tensor([1.0])),
int4_dynamic_activation_int4_weight(),
int8_dynamic_activation_int8_weight(),
int8_dynamic_activation_int4_weight(),
int8_weight_only(),
],
)
def test_workflow_e2e_numerics(self, config):
Expand All @@ -816,6 +822,11 @@ def test_workflow_e2e_numerics(self, config):
and not is_sm_at_least_89()
):
return unittest.skip("requires CUDA capability 8.9 or greater")
elif (
isinstance(config, int4_dynamic_activation_int4_weight)
and is_sm_at_least_90()
):
return unittest.skip("only supported on CUDA capability 8.9, not greater")

# scale has to be moved to cuda here because the parametrization init
# code happens before gating for cuda availability
Expand All @@ -837,7 +848,7 @@ def test_workflow_e2e_numerics(self, config):
y_q = m_q(x)

sqnr = compute_error(y_ref, y_q)
assert sqnr >= 20, f"SQNR {sqnr} is too low"
assert sqnr >= 16.5, f"SQNR {sqnr} is too low"


class TestMultiTensorFlow(TestCase):
Expand Down
8 changes: 8 additions & 0 deletions torchao/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@
Float8DynamicActivationFloat8WeightConfig,
Float8StaticActivationFloat8WeightConfig,
Float8WeightOnlyConfig,
Int4DynamicActivationInt4WeightConfig,
Int4WeightOnlyConfig,
Int8DynamicActivationInt4WeightConfig,
Int8DynamicActivationInt8WeightConfig,
Int8WeightOnlyConfig,
float8_dynamic_activation_float8_weight,
float8_static_activation_float8_weight,
float8_weight_only,
Expand Down Expand Up @@ -123,7 +127,11 @@
"fpx_weight_only",
"gemlite_uintx_weight_only",
"swap_conv2d_1x1_to_linear",
"Int4DynamicActivationInt4WeightConfig",
"Int8DynamicActivationInt4WeightConfig",
"Int8DynamicActivationInt8WeightConfig",
"Int4WeightOnlyConfig",
"Int8WeightOnlyConfig",
"Float8WeightOnlyConfig",
"Float8DynamicActivationFloat8WeightConfig",
"Float8StaticActivationFloat8WeightConfig",
Expand Down
Loading

0 comments on commit fb9c3ff

Please sign in to comment.