diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py index a2553c6c..a60c5c24 100644 --- a/gptqmodel/models/base.py +++ b/gptqmodel/models/base.py @@ -83,6 +83,8 @@ def __init__( qlinear_kernel: nn.Module = None, ): super().__init__() + logger.info(f"start base") + self.model = model self.model_type = self.model.config.model_type @@ -151,11 +153,24 @@ def _convert_tensor_to_list(tensor): return new_calibration_dataset_batched def quantize( + self, + calibration_dataset: List[Dict[str, Union[List[int], torch.LongTensor]]], + batch_size: int = 1, + calibration_enable_gpu_cache: bool = True, + ): + if isinstance(self.quantize_config, AutoRoundQuantizeConfig): + return self._quantize(calibration_dataset, batch_size, calibration_enable_gpu_cache) + else: + with torch.inference_mode(): + return self._quantize(calibration_dataset, batch_size, calibration_enable_gpu_cache) + + def _quantize( self, calibration_dataset: List[Dict[str, Union[List[int], torch.LongTensor]]], batch_size: int = 1, calibration_enable_gpu_cache: bool = True, ): + logger.info(f"start quant") if self.quantized: raise EnvironmentError("quantize() is called a model that is already quantized") @@ -496,11 +511,13 @@ def tmp(_, inp, out): [], ) # TODO: is it really OK to cache only the first positional argument? torch.cuda.empty_cache() - logger.info(f"Quantization summary:\n{quant_log}") for module_log in quant_log: logger.info(module_log) + return quant_log, quantizers, force_layer_back_to_cpu, device_map, forward_pass_use_cache + + def pack(self, quant_log, quantizers, force_layer_back_to_cpu, device_map, forward_pass_use_cache): self.qlinear_kernel = pack_model( model=self.model, quantizers=quantizers, diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py index 0d7642f9..2faf3a99 100644 --- a/gptqmodel/utils/model.py +++ b/gptqmodel/utils/model.py @@ -15,6 +15,7 @@ from tqdm import tqdm from transformers import AutoConfig, PretrainedConfig from transformers.utils.hub import cached_file +from concurrent.futures import ThreadPoolExecutor, as_completed from ..models._const import CPU, EXLLAMA_DEFAULT_MAX_INPUT_LENGTH, EXPERT_INDEX_PLACEHOLDER, SUPPORTED_MODELS from ..nn_modules.qlinear import BaseQuantLinear @@ -32,7 +33,7 @@ handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.INFO) - +logger.info(f"start base") def recurse_getattr(obj, attr: str): """ @@ -290,31 +291,41 @@ def pack_model( qlayers = find_layers(model, [QuantLinear]) # Limit pack() thread usage to avoid auto-parallizataion regression - with tctl.threadpool_limits(limits=1): - pbar = tqdm(qlayers.keys(), leave=True) - for name in pbar: - pbar.set_description(f"Packing {name}") - - quantizers[name], scale, zero, g_idx = quantizers[name] - # so far can only pack layer on CPU - layer_device = qlayers[name].device - qlayers[name].to(CPU) - layers[name], scale, zero, g_idx = ( - layers[name].to(CPU), - scale.to(CPU), - zero.to(CPU), - g_idx.to(CPU), - ) - if QuantLinear is MarlinQuantLinear: - qlayers[name].pack(layers[name], scale) - else: - qlayers[name].pack(layers[name], scale, zero, g_idx) - qlayers[name].to(layer_device) + with ThreadPoolExecutor(max_workers=1) as executor: + # future_to_item = {executor.submit(pack_layer, quantizers[name], qlayers[name], layers[name], QuantLinear): name for name in qlayers.keys()} + # for future in tqdm(as_completed(future_to_item), total=len(qlayers.keys())): + # item = future_to_item[future] + # result = future.result() + # print(f"Processed {item} to {result}") + futures = [(executor.submit(pack_layer, quantizers[name], qlayers[name], layers[name], QuantLinear), name) for name in qlayers.keys()] + for item in futures: + future, name = item + future.result() + print(f"{name} Processed") logger.info("Model packed.") return QuantLinear +def pack_layer(quantizer, qlayer, layer, QuantLinear): + with tctl.threadpool_limits(limits=1): + _quantizer, scale, zero, g_idx = quantizer + # so far can only pack layer on CPU + layer_device = qlayer.device + qlayer.to(CPU) + layer, scale, zero, g_idx = ( + layer.to(CPU), + scale.to(CPU), + zero.to(CPU), + g_idx.to(CPU), + ) + if QuantLinear is MarlinQuantLinear: + qlayer.pack(layer, scale) + else: + qlayer.pack(layer, scale, zero, g_idx) + qlayer.to(layer_device) + + def verify_model_hash(file_path: str, verify_hash: str): if not isinstance(verify_hash, str): raise ValueError("model verify_hash must be a string") diff --git a/multithread_pack.ipynb b/multithread_pack.ipynb new file mode 100644 index 00000000..62ff5534 --- /dev/null +++ b/multithread_pack.ipynb @@ -0,0 +1,1616 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "de73aa28-0fcb-4f40-848f-29486f5a4638", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO - start base\n", + "INFO 2024-07-17 20:26:58.042 [61754-MainThread] (refresh_log@4181186596.py:039) init log\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (128838 > 32768). Running this sequence through the model will result in indexing errors\n", + "INFO 2024-07-17 20:26:59.867 [61754-MainThread] (@4181186596.py:074) load data 300 examples 1.64\n" + ] + } + ], + "source": [ + "#!/usr/bin/env python3\n", + "# -*- coding:utf-8 -*-\n", + "import os\n", + "# set env first then load other\n", + "os.environ['OMP_NUM_THREADS'] = '12'\n", + "os.environ['OPENBLAS_NUM_THREADS'] = '12'\n", + "os.environ['MKL_NUM_THREADS'] = '12'\n", + "os.environ['VECLIB_MAXIMUM_THREADS'] = '12'\n", + "os.environ['NUMEXPR_NUM_THREADS'] = '12'\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1,2,5,7\"\n", + "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"expandable_segments:True\"\n", + "\n", + "import time\n", + "from transformers import AutoTokenizer\n", + "from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel\n", + "from gptqmodel import GPTQModel, QuantizeConfig\n", + "import datasets\n", + "import accelerate\n", + "\n", + "import logging\n", + "def refresh_log():\n", + " if logging.root.handlers:\n", + " logging.info(f\"logging already init. reset all\")\n", + " for handler in logging.root.handlers[:]:\n", + " print(f\"{handler.name}\")\n", + " logging.root.removeHandler(handler)\n", + " handler.close()\n", + "\n", + " \n", + " logger_dict = logging.Logger.manager\n", + " logger_dict.loggerDict = {}\n", + "\n", + " \n", + "\n", + " format = \"%(levelname)s %(asctime)s.%(msecs)03d [%(process)d-%(threadName)s] (%(funcName)s@%(filename)s:%(lineno)03d) %(message)s\"\n", + " datefmt = \"%Y-%m-%d %H:%M:%S\"\n", + " logging.basicConfig(format=format, datefmt=datefmt, level=logging.INFO, handlers=[logging.StreamHandler()])\n", + " logging.info(f\"init log\")\n", + "\n", + "refresh_log()\n", + "\n", + "import builtins\n", + "def print(*args, **kwargs):\n", + " sep = kwargs.get(\"sep\", \" \") # default separator\n", + " end = kwargs.get(\"end\", \"\") # default end\n", + " message = sep.join(map(str, args)) + end\n", + " logging.info(message, stacklevel=3)\n", + "\n", + "builtins.print = print\n", + "\n", + "\n", + "# pretrained_model_dir = '/aigc-nas02/zpfcode/online_model/xdlovers_72b_0712'\n", + "pretrained_model_dir = '/aigc-nas02/hf_models/Qwen1.5-0.5B'\n", + "quantized_model_dir = f'{pretrained_model_dir}-gptq-int8'\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)\n", + "\n", + "\n", + "quantize_config = QuantizeConfig(\n", + " bits=8, # quantize model to 4-bit\n", + " group_size=128, # it is recommended to set the value to 128\n", + " desc_act=False, # set to False can significantly speed up inference but the perplexity may slightly bad\n", + " damp_percent=0.01,\n", + ")\n", + "\n", + "# max_memory={ 0:\"10GIB\", 1:\"64GIB\", 2:\"64GIB\", 3:\"64GIB\", \"cpu\":\"200GIB\"}\n", + "\n", + "# # device_map=\"balanced_low_0\",\n", + "# model = GPTQModel.from_pretrained(pretrained_model_dir, quantize_config, trust_remote_code=True,\n", + "# max_memory=max_memory,\n", + "# attn_implementation=\"flash_attention_2\") \n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0970ec88-bcf3-4307-b768-e4bf4e049d2c", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "examples = []\n", + "ds_st = time.perf_counter()\n", + "pile_dataset: datasets.Dataset = datasets.load_dataset(r\"/aigc-nas02/cyj/pile-val-backup\", split='validation')\n", + "\n", + "def split_slow_tokenize(dataset: datasets.Dataset):\n", + " dataset_10k = dataset[:300]\n", + " examples = []\n", + " for data in dataset_10k['text']:\n", + " examples.append(tokenizer(data))\n", + " return examples\n", + "\n", + "\n", + "examples = split_slow_tokenize(pile_dataset)\n", + "logging.info(f\"load data {len(examples)} examples {time.perf_counter() - ds_st:.2f}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cbd75369-03aa-4790-a232-c0ec34d51c21", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO 2024-07-17 20:20:43.803 [12438-MainThread] (@2049556245.py:001) start quantize\n", + "INFO - start quant\n", + "Quantizing self_attn.k_proj in layer 1 of 24: 0%| | 0/24 [00:01@3912912720.py:001) start pack\n", + "INFO 2024-07-17 20:27:58.838 [61754-MainThread] (select_quant_linear@importer.py:051) Auto choose the fastest one based on quant model compatibility: \n", + "INFO - Packing model...\n", + "INFO 2024-07-17 20:27:58.841 [61754-MainThread] (pack_model@model.py:278) Packing model...\n", + "INFO 2024-07-17 20:27:58.842 [61754-MainThread] (select_quant_linear@importer.py:051) Auto choose the fastest one based on quant model compatibility: \n", + " 0%| | 0/168 [00:00 2\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_quantized\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquantized_model_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/aigc-nas02/workspace/online/llm_infer/GPTQModel/gptqmodel/models/base.py:624\u001b[0m, in \u001b[0;36mBaseGPTQModel.save_quantized\u001b[0;34m(self, save_dir, safetensors_metadata, use_safetensors, max_shard_size, model_base_name)\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;66;03m# internal is always gptq v2 but allow users to pass gptq (v1) via config\u001b[39;00m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m quantize_config\u001b[38;5;241m.\u001b[39mformat \u001b[38;5;241m==\u001b[39m FORMAT\u001b[38;5;241m.\u001b[39mGPTQ:\n\u001b[1;32m 622\u001b[0m \u001b[38;5;66;03m# Model qzeros may be edited in place.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \u001b[38;5;66;03m# TODO: avoid inplace modification of the weights\u001b[39;00m\n\u001b[0;32m--> 624\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mcopy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 625\u001b[0m model \u001b[38;5;241m=\u001b[39m convert_gptq_v2_to_v1_format(\n\u001b[1;32m 626\u001b[0m model, quantize_config\u001b[38;5;241m=\u001b[39mquantize_config, qlinear_kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqlinear_kernel\n\u001b[1;32m 627\u001b[0m )\n\u001b[1;32m 629\u001b[0m model\u001b[38;5;241m.\u001b[39mto(CPU)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__deepcopy__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[38;5;241m=\u001b[39m dispatch_table\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/_tensor.py:86\u001b[0m, in \u001b[0;36mTensor.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(Tensor\u001b[38;5;241m.\u001b[39m__deepcopy__, (\u001b[38;5;28mself\u001b[39m,), \u001b[38;5;28mself\u001b[39m, memo)\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_leaf:\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 87\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOnly Tensors created explicitly by the user \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(graph leaves) support the deepcopy protocol at the moment. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIf you were attempting to deepcopy a module, this may be because \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof a torch.nn.utils.weight_norm usage, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msee https://github.com/pytorch/pytorch/pull/103001\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m memo:\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m memo[\u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m)]\n", + "\u001b[0;31mRuntimeError\u001b[0m: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment. If you were attempting to deepcopy a module, this may be because of a torch.nn.utils.weight_norm usage, see https://github.com/pytorch/pytorch/pull/103001" + ] + } + ], + "source": [ + "# save quantized model using safetensors\n", + "model.save_quantized(quantized_model_dir, use_safetensors=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "908f5642-526f-45b3-9324-8cb14b037b6f", + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment. If you were attempting to deepcopy a module, this may be because of a torch.nn.utils.weight_norm usage, see https://github.com/pytorch/pytorch/pull/103001", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[27], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m model\u001b[38;5;241m.\u001b[39mload_state_dict(copy\u001b[38;5;241m.\u001b[39mdeepcopy(model\u001b[38;5;241m.\u001b[39mstate_dict()))\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# save quantized model using safetensors\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_quantized\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquantized_model_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/aigc-nas02/workspace/online/llm_infer/GPTQModel/gptqmodel/models/base.py:625\u001b[0m, in \u001b[0;36msave_quantized\u001b[0;34m(self, save_dir, safetensors_metadata, use_safetensors, max_shard_size, model_base_name)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m quantize_config\u001b[38;5;241m.\u001b[39mformat \u001b[38;5;241m==\u001b[39m FORMAT\u001b[38;5;241m.\u001b[39mGPTQ:\n\u001b[1;32m 622\u001b[0m \u001b[38;5;66;03m# Model qzeros may be edited in place.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \u001b[38;5;66;03m# TODO: avoid inplace modification of the weights\u001b[39;00m\n\u001b[1;32m 624\u001b[0m model \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 625\u001b[0m model \u001b[38;5;241m=\u001b[39m convert_gptq_v2_to_v1_format(\n\u001b[1;32m 626\u001b[0m model, quantize_config\u001b[38;5;241m=\u001b[39mquantize_config, qlinear_kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqlinear_kernel\n\u001b[1;32m 627\u001b[0m )\n\u001b[1;32m 629\u001b[0m model\u001b[38;5;241m.\u001b[39mto(CPU)\n\u001b[1;32m 631\u001b[0m state_dict \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mstate_dict()\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__deepcopy__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[38;5;241m=\u001b[39m dispatch_table\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/_tensor.py:86\u001b[0m, in \u001b[0;36mTensor.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(Tensor\u001b[38;5;241m.\u001b[39m__deepcopy__, (\u001b[38;5;28mself\u001b[39m,), \u001b[38;5;28mself\u001b[39m, memo)\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_leaf:\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 87\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOnly Tensors created explicitly by the user \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(graph leaves) support the deepcopy protocol at the moment. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIf you were attempting to deepcopy a module, this may be because \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof a torch.nn.utils.weight_norm usage, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msee https://github.com/pytorch/pytorch/pull/103001\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m memo:\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m memo[\u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m)]\n", + "\u001b[0;31mRuntimeError\u001b[0m: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment. If you were attempting to deepcopy a module, this may be because of a torch.nn.utils.weight_norm usage, see https://github.com/pytorch/pytorch/pull/103001" + ] + } + ], + "source": [ + "from torch import device\n", + "model.to(device(\"cpu\"))\n", + "import copy\n", + "model.load_state_dict(copy.deepcopy(model.state_dict()))\n", + "\n", + "# save quantized model using safetensors\n", + "model.save_quantized(quantized_model_dir, use_safetensors=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7eda7b80-a12e-46ed-a98f-21737a7fac15", + "metadata": {}, + "outputs": [], + "source": [ + "def shallow_copy(x: dict):\n", + " c = {}\n", + " for k,v in x.items():\n", + " c[k] = clone_if_tensor(v)\n", + " return c\n", + "\n", + "\n", + "def clone_if_tensor(value):\n", + " import torch\n", + " if isinstance(value, torch.Tensor):\n", + " return value.clone()\n", + " elif isinstance(value, list):\n", + " return [clone_if_tensor(v) for v in value]\n", + " elif isinstance(value, dict):\n", + " return {k: clone_if_tensor(v) for k, v in value.items()}\n", + " elif isinstance(value, (int, float, bool, str)):\n", + " return copy.deepcopy(value)\n", + " else:\n", + " return value" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "57ee1356-9b21-4966-8830-66aa9621490e", + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment. If you were attempting to deepcopy a module, this may be because of a torch.nn.utils.weight_norm usage, see https://github.com/pytorch/pytorch/pull/103001", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[30], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m model\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mload_state_dict(shallow_copy(model\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mstate_dict()))\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# save quantized model using safetensors\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_quantized\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquantized_model_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/aigc-nas02/workspace/online/llm_infer/GPTQModel/gptqmodel/models/base.py:625\u001b[0m, in \u001b[0;36msave_quantized\u001b[0;34m(self, save_dir, safetensors_metadata, use_safetensors, max_shard_size, model_base_name)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m quantize_config\u001b[38;5;241m.\u001b[39mformat \u001b[38;5;241m==\u001b[39m FORMAT\u001b[38;5;241m.\u001b[39mGPTQ:\n\u001b[1;32m 622\u001b[0m \u001b[38;5;66;03m# Model qzeros may be edited in place.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \u001b[38;5;66;03m# TODO: avoid inplace modification of the weights\u001b[39;00m\n\u001b[1;32m 624\u001b[0m model \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 625\u001b[0m model \u001b[38;5;241m=\u001b[39m convert_gptq_v2_to_v1_format(\n\u001b[1;32m 626\u001b[0m model, quantize_config\u001b[38;5;241m=\u001b[39mquantize_config, qlinear_kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqlinear_kernel\n\u001b[1;32m 627\u001b[0m )\n\u001b[1;32m 629\u001b[0m model\u001b[38;5;241m.\u001b[39mto(CPU)\n\u001b[1;32m 631\u001b[0m state_dict \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mstate_dict()\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[38;5;241m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[38;5;241m=\u001b[39m _deepcopy_dispatch\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;28mtype\u001b[39m):\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[38;5;28mid\u001b[39m(x)] \u001b[38;5;241m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[38;5;241m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m x:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[38;5;241m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 297\u001b[0m y[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__deepcopy__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copier \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mcopier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmemo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[38;5;241m=\u001b[39m dispatch_table\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mcls\u001b[39m)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/_tensor.py:86\u001b[0m, in \u001b[0;36mTensor.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(Tensor\u001b[38;5;241m.\u001b[39m__deepcopy__, (\u001b[38;5;28mself\u001b[39m,), \u001b[38;5;28mself\u001b[39m, memo)\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_leaf:\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 87\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOnly Tensors created explicitly by the user \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(graph leaves) support the deepcopy protocol at the moment. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIf you were attempting to deepcopy a module, this may be because \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof a torch.nn.utils.weight_norm usage, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msee https://github.com/pytorch/pytorch/pull/103001\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m memo:\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m memo[\u001b[38;5;28mid\u001b[39m(\u001b[38;5;28mself\u001b[39m)]\n", + "\u001b[0;31mRuntimeError\u001b[0m: Only Tensors created explicitly by the user (graph leaves) support the deepcopy protocol at the moment. If you were attempting to deepcopy a module, this may be because of a torch.nn.utils.weight_norm usage, see https://github.com/pytorch/pytorch/pull/103001" + ] + } + ], + "source": [ + "model.model.load_state_dict(shallow_copy(model.model.state_dict()))\n", + "# save quantized model using safetensors\n", + "model.save_quantized(quantized_model_dir, use_safetensors=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ad3bb208-f1b6-4eee-a2e5-2db7cc046751", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Qwen2ForCausalLM' object has no attribute 'weight'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[29], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# model = utils.weight_norm(model.state_dict())\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m model2 \u001b[38;5;241m=\u001b[39m \u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight_norm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# 在进行深拷贝前移除权重规范化\u001b[39;00m\n\u001b[1;32m 9\u001b[0m utils\u001b[38;5;241m.\u001b[39mremove_weight_norm(model2)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:130\u001b[0m, in \u001b[0;36mweight_norm\u001b[0;34m(module, name, dim)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mweight_norm\u001b[39m(module: T_module, name: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m, dim: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T_module:\n\u001b[1;32m 71\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Apply weight normalization to a parameter in the given module.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m .. math::\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 128\u001b[0m \n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 130\u001b[0m \u001b[43mWeightNorm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdim\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:39\u001b[0m, in \u001b[0;36mWeightNorm.apply\u001b[0;34m(module, name, dim)\u001b[0m\n\u001b[1;32m 35\u001b[0m dim \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 37\u001b[0m fn \u001b[38;5;241m=\u001b[39m WeightNorm(name, dim)\n\u001b[0;32m---> 39\u001b[0m weight \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(weight, UninitializedParameter):\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mThe module passed to `WeightNorm` can\u001b[39m\u001b[38;5;130;01m\\'\u001b[39;00m\u001b[38;5;124mt have uninitialized parameters. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMake sure to run the dummy forward before applying weight normalization\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m/home/work/miniconda3/envs/autoawq2/lib/python3.9/site-packages/torch/nn/modules/module.py:1709\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1707\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m 1708\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1709\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Qwen2ForCausalLM' object has no attribute 'weight'" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.utils as utils\n", + "\n", + "# model = utils.weight_norm(model.state_dict())\n", + "model2 = utils.weight_norm(model.model)\n", + "\n", + "# 在进行深拷贝前移除权重规范化\n", + "utils.remove_weight_norm(model2)\n", + "\n", + "# 进行深拷贝\n", + "model_copy = copy.deepcopy(model2)\n", + "\n", + "# 重新应用权重规范化\n", + "model_copy = utils.weight_norm(model_copy)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1da8dcdf-176f-4114-89e8-9a8d16508dd4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}