remove useless params(max_memory) (#144)

ModelCloud · Jul 2, 2024 · 9babfe0 · 9babfe0
1 parent a4e7d90
commit 9babfe0
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 27 deletions.
diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -98,15 +98,13 @@ def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
         quantize_config: QuantizeConfig,
-        max_memory: Optional[dict] = None,
         trust_remote_code: bool = False,
         **model_init_kwargs,
     ) -> BaseGPTQModel:
         model_type = check_and_get_model_type(pretrained_model_name_or_path, trust_remote_code)
         return MODEL_MAP[model_type].from_pretrained(
             pretrained_model_name_or_path=pretrained_model_name_or_path,
             quantize_config=quantize_config,
-            max_memory=max_memory,
             trust_remote_code=trust_remote_code,
             **model_init_kwargs,
         )

diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -679,7 +679,6 @@ def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
         quantize_config: QuantizeConfig,
-        max_memory: Optional[dict] = None,
         trust_remote_code: bool = False,
         torch_dtype: [str | torch.dtype] = "auto",
         **model_init_kwargs,
@@ -721,30 +720,6 @@ def skip(*args, **kwargs):
         if config.model_type not in SUPPORTED_MODELS:
             raise TypeError(f"{config.model_type} isn't supported yet.")
 
-        if max_memory:
-            if "disk" in max_memory:
-                raise NotImplementedError("disk offload not support yet.")
-            with accelerate.init_empty_weights():
-                model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
-            model.tie_weights()
-
-            max_memory = accelerate.utils.get_balanced_memory(
-                model,
-                max_memory=max_memory,
-                no_split_module_classes=[cls.layer_type],
-                dtype=model_init_kwargs["torch_dtype"],
-                low_zero=False,
-            )
-            model_init_kwargs["device_map"] = accelerate.infer_auto_device_map(
-                model,
-                max_memory=max_memory,
-                no_split_module_classes=[cls.layer_type],
-                dtype=model_init_kwargs["torch_dtype"],
-            )
-            del model
-        else:
-            model_init_kwargs["device_map"] = None
-
         torch.cuda.empty_cache()
 
         model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **model_init_kwargs)