diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index bbafdd92085..c7c350286c7 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -39,7 +39,6 @@ def LoadModel(self, request, context): self.model_name = "Qwen-VL-Chat" model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=request.TrustRemoteCode, - use_triton=request.UseTriton, device_map="auto").eval() else: model = AutoGPTQForCausalLM.from_quantized(model_path,