From d7c0c1fca50775fde138dfc7f8580fd2f0c236c2 Mon Sep 17 00:00:00 2001 From: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com> Date: Thu, 25 Jul 2024 10:37:59 +0800 Subject: [PATCH] [FIX] set the nsample/seqlen according to the actual size of the calibration_dataset. (#297) * set the nsamples according to the actual size of the calibration_dataset * set the seqlen according to the actual size of the calibration_dataset. * cleanup --------- Co-authored-by: LRL-ModelCloud --- gptqmodel/models/base.py | 14 ++++++++++---- gptqmodel/quantization/config.py | 3 --- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py index af051051..177ee976 100644 --- a/gptqmodel/models/base.py +++ b/gptqmodel/models/base.py @@ -204,6 +204,7 @@ def quantize( # Calculate the average length of the average input_ids total_input_ids_length = 0 + max_input_id_length = 0 for row in calibration_dataset: input_ids = row["input_ids"] if isinstance(input_ids, torch.Tensor): @@ -213,6 +214,9 @@ def quantize( raise ValueError("Expected a 1-dimensional tensor for 'input_ids', but got a tensor with {0} dimensions.".format(input_ids.dim())) else: input_ids_length = len(input_ids) + + if input_ids_length > max_input_id_length: + max_input_id_length = input_ids_length total_input_ids_length += input_ids_length avg = total_input_ids_length / len(calibration_dataset) @@ -272,15 +276,17 @@ def collate_batch(batch): res = {"input_ids": input_ids_new, "attention_mask": attention_mask_new} return res - # we can pass batch_size=len(calibration_dataset), cause it spends less memory on GPU - dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=len(calibration_dataset)) + # set the nsamples/seqlen according to the actual size of the calibration_dataset. + nsamples = len(calibration_dataset) + seqlen = max_input_id_length + dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=nsamples) self.autoround = AutoRound(self.model, tokenizer=None, bits=self.quantize_config.bits, group_size=self.quantize_config.group_size, - sym=self.quantize_config.sym, batch_size=batch_size, - dataset=dataloader, seqlen=self.quantize_config.seqlen, nblocks=self.quantize_config.nblocks, + sym=self.quantize_config.sym, batch_size=batch_size, n_samples=nsamples, + dataset=dataloader, seqlen=seqlen, nblocks=self.quantize_config.nblocks, iters=self.quantize_config.iters, lr=self.quantize_config.lr, minmax_lr=self.quantize_config.minmax_lr, enable_quanted_input=self.quantize_config.enable_quanted_input, diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py index 146aea26..c37b3fc5 100644 --- a/gptqmodel/quantization/config.py +++ b/gptqmodel/quantization/config.py @@ -314,7 +314,6 @@ class AutoRoundQuantizeConfig(QuantizeConfig): minmax_lr: float = None low_gpu_mem_usage: bool = False iters: int = 200 - seqlen: int = 2048 sampler: str = "rand" seed: int = 42 nblocks: int = 1 @@ -338,8 +337,6 @@ def to_dict(self): self.meta_set("minmax_lr", self.minmax_lr) self.meta_set("low_gpu_mem_usage", self.low_gpu_mem_usage) self.meta_set("iters", self.iters) - self.meta_set("seqlen", self.seqlen) - # self.meta_set("nsamples", self.nsamples) self.meta_set("sampler", self.sampler) self.meta_set("seed", self.seed) self.meta_set("nblocks", self.nblocks)