From dbbdb53a18beec2fda158bb711abf4dd3fd66012 Mon Sep 17 00:00:00 2001
From: "Chen, Zhentao" <zhentao.chen@intel.com>
Date: Tue, 14 Nov 2023 17:06:27 +0800
Subject: [PATCH] fix multiple gpu usage (#9459)

---
 python/llm/dev/benchmark/harness/bigdl_llm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/llm/dev/benchmark/harness/bigdl_llm.py b/python/llm/dev/benchmark/harness/bigdl_llm.py
index 6e0bbfa6..c2ba098b 100644
--- a/python/llm/dev/benchmark/harness/bigdl_llm.py
+++ b/python/llm/dev/benchmark/harness/bigdl_llm.py
@@ -54,7 +54,7 @@ class BigDLLM(BaseLM):
 
         assert isinstance(pretrained, str)
         assert isinstance(batch_size, (int,str))
-        if device == 'xpu':
+        if 'xpu' in device:
             import intel_extension_for_pytorch as ipex
         model = AutoModelForCausalLM.from_pretrained(pretrained,
                                           load_in_low_bit=load_in_low_bit,
@@ -118,4 +118,4 @@ class BigDLLM(BaseLM):
             return res
 
     def _model_generate(self, context, max_length, eos_token_id):
-        return self.model(context, max_tokens=max_length, stop=["Q:", "\n"], echo=True)
\ No newline at end of file
+        return self.model(context, max_tokens=max_length, stop=["Q:", "\n"], echo=True)