diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
index ae2f7317ab..103015642c 100644
--- a/optimum/gptq/quantizer.py
+++ b/optimum/gptq/quantizer.py
@@ -245,10 +245,10 @@ def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: st
                     out_features = layer.weight.shape[1]
                 if not (self.desc_act) or self.group_size == -1:
                     new_layer = QuantLinear(
-                        self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16
+                        self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16, weight_dtype=layer.weight.dtype
                     )
                 else:
-                    new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True)
+                    new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype)
                 new_layer.device = device
                 setattr(module, attr, new_layer.to(device))
         for name1, child in module.named_children():
diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
index d1729d8599..f262548975 100644
--- a/optimum/utils/import_utils.py
+++ b/optimum/utils/import_utils.py
@@ -35,7 +35,7 @@
 TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0")
 TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0")
 DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0")
-AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2")
+AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.5.0")
 
 
 # This is the minimal required version to support some ONNX Runtime features