Skip to content

Commit

Permalink
[FIX] set the nsample/seqlen according to the actual size of the cali…
Browse files Browse the repository at this point in the history
…bration_dataset. (#297)

* set the nsamples according to the actual size of the calibration_dataset

* set the seqlen according to the actual size of the calibration_dataset.

* cleanup

---------

Co-authored-by: LRL-ModelCloud <[email protected]>
  • Loading branch information
LRL-ModelCloud and LRL-ModelCloud authored Jul 25, 2024
1 parent 69c7e71 commit d7c0c1f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
14 changes: 10 additions & 4 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def quantize(

# Calculate the average length of the average input_ids
total_input_ids_length = 0
max_input_id_length = 0
for row in calibration_dataset:
input_ids = row["input_ids"]
if isinstance(input_ids, torch.Tensor):
Expand All @@ -213,6 +214,9 @@ def quantize(
raise ValueError("Expected a 1-dimensional tensor for 'input_ids', but got a tensor with {0} dimensions.".format(input_ids.dim()))
else:
input_ids_length = len(input_ids)

if input_ids_length > max_input_id_length:
max_input_id_length = input_ids_length
total_input_ids_length += input_ids_length
avg = total_input_ids_length / len(calibration_dataset)

Expand Down Expand Up @@ -272,15 +276,17 @@ def collate_batch(batch):
res = {"input_ids": input_ids_new, "attention_mask": attention_mask_new}
return res

# we can pass batch_size=len(calibration_dataset), cause it spends less memory on GPU
dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=len(calibration_dataset))
# set the nsamples/seqlen according to the actual size of the calibration_dataset.
nsamples = len(calibration_dataset)
seqlen = max_input_id_length
dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=nsamples)

self.autoround = AutoRound(self.model,
tokenizer=None,
bits=self.quantize_config.bits,
group_size=self.quantize_config.group_size,
sym=self.quantize_config.sym, batch_size=batch_size,
dataset=dataloader, seqlen=self.quantize_config.seqlen, nblocks=self.quantize_config.nblocks,
sym=self.quantize_config.sym, batch_size=batch_size, n_samples=nsamples,
dataset=dataloader, seqlen=seqlen, nblocks=self.quantize_config.nblocks,
iters=self.quantize_config.iters, lr=self.quantize_config.lr,
minmax_lr=self.quantize_config.minmax_lr,
enable_quanted_input=self.quantize_config.enable_quanted_input,
Expand Down
3 changes: 0 additions & 3 deletions gptqmodel/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ class AutoRoundQuantizeConfig(QuantizeConfig):
minmax_lr: float = None
low_gpu_mem_usage: bool = False
iters: int = 200
seqlen: int = 2048
sampler: str = "rand"
seed: int = 42
nblocks: int = 1
Expand All @@ -338,8 +337,6 @@ def to_dict(self):
self.meta_set("minmax_lr", self.minmax_lr)
self.meta_set("low_gpu_mem_usage", self.low_gpu_mem_usage)
self.meta_set("iters", self.iters)
self.meta_set("seqlen", self.seqlen)
# self.meta_set("nsamples", self.nsamples)
self.meta_set("sampler", self.sampler)
self.meta_set("seed", self.seed)
self.meta_set("nblocks", self.nblocks)
Expand Down

0 comments on commit d7c0c1f

Please sign in to comment.