g_idx to fakequantize

neuralmagic · Jun 26, 2024 · c6b5b28 · c6b5b28
1 parent 2525f69
commit c6b5b28
Showing 1 changed file with 9 additions and 7 deletions.
diff --git a/src/sparseml/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/sparseml/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -192,10 +192,10 @@ def fasterprune(
                                 requires_grad=False,
                             )
                         else:
-                            g_idx = torch.Tensor(
+                            g_idx = torch.tensor(
                                 [j // group_size for j in range(self.columns)],
-
-                                device=W.device,
+                                dtype=torch.int32,
+                                device=W.device
                             )
 
                         from compressed_tensors.quantization import QuantizationStrategy
@@ -204,14 +204,12 @@ def fasterprune(
                         )
 
                         strategy = quant_scheme.weights.strategy
-                        breakpoint()
                         if strategy == QuantizationStrategy.TENSOR:
                             q = fake_quantize(
                                 q,
                                 scale,
                                 zero_point,
                                 self.layer.quantization_scheme.weights,
-                                g_idx,
                             )
                         elif strategy == QuantizationStrategy.CHANNEL:
                             # TODO: for channelwise why isn't this just a 1d tensor?
@@ -228,16 +226,20 @@ def fasterprune(
                             input_dim_group = (
                                 column_idx // quant_scheme.weights.group_size
                             )
-
                             # Since we're only applying quantization to a slice, this
                             # ends up being a channelwise application
                             altered_qargs = copy(quant_scheme.weights)
                             altered_qargs.strategy = QuantizationStrategy.CHANNEL
+
+                            # # apply g_idx 
+                            # if g_idx is not None:
+                            #     scale = scale[g_idx]
+                            #     zero_point = zero_point[g_idx]
+
                             q = fake_quantize(
                                 q,
                                 scale[:, input_dim_group],
                                 zero_point[:, input_dim_group],
-                                # g_idx,
                                 altered_qargs,
                             )