Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] set the nsample/seqlen according to the actual size of the calibration_dataset. #297

Merged
merged 3 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def quantize(

# Calculate the average length of the average input_ids
total_input_ids_length = 0
max_input_id_length = 0
for row in calibration_dataset:
input_ids = row["input_ids"]
if isinstance(input_ids, torch.Tensor):
Expand All @@ -211,6 +212,9 @@ def quantize(
raise ValueError("Expected a 1-dimensional tensor for 'input_ids', but got a tensor with {0} dimensions.".format(input_ids.dim()))
else:
input_ids_length = len(input_ids)

if input_ids_length > max_input_id_length:
max_input_id_length = input_ids_length
total_input_ids_length += input_ids_length
avg = total_input_ids_length / len(calibration_dataset)

Expand Down Expand Up @@ -270,15 +274,17 @@ def collate_batch(batch):
res = {"input_ids": input_ids_new, "attention_mask": attention_mask_new}
return res

# we can pass batch_size=len(calibration_dataset), cause it spends less memory on GPU
dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=len(calibration_dataset))
# set the nsamples/seqlen according to the actual size of the calibration_dataset.
nsamples = len(calibration_dataset)
seqlen = max_input_id_length
dataloader = DataLoader(calibration_dataset, collate_fn=collate_batch, shuffle=False, batch_size=nsamples)

self.autoround = AutoRound(self.model,
tokenizer=None,
bits=self.quantize_config.bits,
group_size=self.quantize_config.group_size,
sym=self.quantize_config.sym, batch_size=batch_size,
dataset=dataloader, seqlen=self.quantize_config.seqlen, nblocks=self.quantize_config.nblocks,
sym=self.quantize_config.sym, batch_size=batch_size, n_samples=nsamples,
dataset=dataloader, seqlen=seqlen, nblocks=self.quantize_config.nblocks,
iters=self.quantize_config.iters, lr=self.quantize_config.lr,
minmax_lr=self.quantize_config.minmax_lr,
enable_quanted_input=self.quantize_config.enable_quanted_input,
Expand Down
3 changes: 0 additions & 3 deletions gptqmodel/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ class AutoRoundQuantizeConfig(QuantizeConfig):
minmax_lr: float = None
low_gpu_mem_usage: bool = False
iters: int = 200
seqlen: int = 2048
sampler: str = "rand"
seed: int = 42
nblocks: int = 1
Expand All @@ -337,8 +336,6 @@ def to_dict(self):
self.meta_set("minmax_lr", self.minmax_lr)
self.meta_set("low_gpu_mem_usage", self.low_gpu_mem_usage)
self.meta_set("iters", self.iters)
self.meta_set("seqlen", self.seqlen)
# self.meta_set("nsamples", self.nsamples)
self.meta_set("sampler", self.sampler)
self.meta_set("seed", self.seed)
self.meta_set("nblocks", self.nblocks)
Expand Down