[FIX] set the nsample/seqlen according to the actual size of the cali…

…bration_dataset. (#297) * set the nsamples according to the actual size of the calibration_dataset * set the seqlen according to the actual size of the calibration_dataset. * cleanup --------- Co-authored-by: LRL-ModelCloud <[email protected]>
ModelCloud · Jul 25, 2024 · d7c0c1f · d7c0c1f
1 parent 69c7e71
commit d7c0c1f
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 7 deletions.
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -204,6 +204,7 @@ def quantize(
 
         # Calculate the average length of the average input_ids
         total_input_ids_length = 0
+        max_input_id_length = 0
         for row in calibration_dataset:
             input_ids = row["input_ids"]
             if isinstance(input_ids, torch.Tensor):
@@ -213,6 +214,9 @@ def quantize(
                     raise ValueError("Expected a 1-dimensional tensor for 'input_ids', but got a tensor with {0} dimensions.".format(input_ids.dim()))
             else:
                 input_ids_length = len(input_ids)
+
+            if input_ids_length > max_input_id_length:
+                max_input_id_length = input_ids_length
             total_input_ids_length += input_ids_length
         avg = total_input_ids_length / len(calibration_dataset)
 
@@ -272,15 +276,17 @@ def collate_batch(batch):
                 res = {"input_ids": input_ids_new, "attention_mask": attention_mask_new}
                 return res
 
-            # we can pass batch_size=len(calibration_dataset), cause it spends less memory on GPU
-            dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=len(calibration_dataset))
+            # set the nsamples/seqlen according to the actual size of the calibration_dataset.
+            nsamples = len(calibration_dataset)
+            seqlen = max_input_id_length
+            dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=nsamples)
 
             self.autoround = AutoRound(self.model,
                                   tokenizer=None,
                                   bits=self.quantize_config.bits,
                                   group_size=self.quantize_config.group_size,
-                                  sym=self.quantize_config.sym, batch_size=batch_size,
-                                  dataset=dataloader, seqlen=self.quantize_config.seqlen, nblocks=self.quantize_config.nblocks,
+                                  sym=self.quantize_config.sym, batch_size=batch_size, n_samples=nsamples,
+                                  dataset=dataloader, seqlen=seqlen, nblocks=self.quantize_config.nblocks,
                                   iters=self.quantize_config.iters, lr=self.quantize_config.lr,
                                   minmax_lr=self.quantize_config.minmax_lr,
                                   enable_quanted_input=self.quantize_config.enable_quanted_input,

diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py
@@ -314,7 +314,6 @@ class AutoRoundQuantizeConfig(QuantizeConfig):
     minmax_lr: float = None
     low_gpu_mem_usage: bool = False
     iters: int = 200
-    seqlen: int = 2048
     sampler: str = "rand"
     seed: int = 42
     nblocks: int = 1
@@ -338,8 +337,6 @@ def to_dict(self):
         self.meta_set("minmax_lr", self.minmax_lr)
         self.meta_set("low_gpu_mem_usage", self.low_gpu_mem_usage)
         self.meta_set("iters", self.iters)
-        self.meta_set("seqlen", self.seqlen)
-        # self.meta_set("nsamples", self.nsamples)
         self.meta_set("sampler", self.sampler)
         self.meta_set("seed", self.seed)
         self.meta_set("nblocks", self.nblocks)