Skip to content

Commit

Permalink
fix regression #1971 (#1972)
Browse files Browse the repository at this point in the history
fixes regression #1971 introduced by intel_extension_for_transformers==1.4
  • Loading branch information
fakezeta authored Apr 8, 2024
1 parent efcca15 commit a38618d
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions backend/python/transformers/transformers_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@

XPU=os.environ.get("XPU", "0") == "1"
if XPU:
import intel_extension_for_pytorch as ipex
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
from optimum.intel.openvino import OVModelForCausalLM
from openvino.runtime import Core
else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer

Expand Down Expand Up @@ -115,6 +111,9 @@ def LoadModel(self, request, context):
try:
if request.Type == "AutoModelForCausalLM":
if XPU:
import intel_extension_for_pytorch as ipex
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM

device_map="xpu"
compute=torch.float16
if request.Quantization == "xpu_4bit":
Expand All @@ -141,6 +140,9 @@ def LoadModel(self, request, context):
device_map=device_map,
torch_dtype=compute)
elif request.Type == "OVModelForCausalLM":
from optimum.intel.openvino import OVModelForCausalLM
from openvino.runtime import Core

if "GPU" in Core().available_devices:
device_map="GPU"
else:
Expand Down

0 comments on commit a38618d

Please sign in to comment.