From 45820cf3b90e903e46313a2d418faa9cc16a54a6 Mon Sep 17 00:00:00 2001
From: "Chen, Zhentao" <zhentao.chen@intel.com>
Date: Fri, 24 Nov 2023 17:10:49 +0800
Subject: [PATCH] add optimize model option (#9530)

---
 python/llm/dev/benchmark/harness/bigdl_llm.py | 7 ++++---
 python/llm/dev/benchmark/harness/run_llb.py   | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/llm/dev/benchmark/harness/bigdl_llm.py b/python/llm/dev/benchmark/harness/bigdl_llm.py
index c2ba098b..39dd0058 100644
--- a/python/llm/dev/benchmark/harness/bigdl_llm.py
+++ b/python/llm/dev/benchmark/harness/bigdl_llm.py
@@ -46,9 +46,10 @@ class BigDLLM(BaseLM):
         tokenizer=None,
         batch_size=1,
         load_in_8bit: Optional[bool] = False,
-        trust_remote_code: Optional[bool] = False,
+        trust_remote_code: Optional[bool] = True,
         load_in_low_bit=None,
         dtype: Optional[Union[str, torch.dtype]] = "auto",
+        **kwargs
     ):
         super().__init__()
 
@@ -58,8 +59,8 @@ class BigDLLM(BaseLM):
             import intel_extension_for_pytorch as ipex
         model = AutoModelForCausalLM.from_pretrained(pretrained,
                                           load_in_low_bit=load_in_low_bit,
-                                          optimize_model=True,
-                                          trust_remote_code=True,
+                                          optimize_model=kwargs.get('optimize_model', True),
+                                          trust_remote_code=trust_remote_code,
                                           use_cache=True,
                                           torch_dtype=_get_dtype(dtype))
         print(model) # print model to check precision
diff --git a/python/llm/dev/benchmark/harness/run_llb.py b/python/llm/dev/benchmark/harness/run_llb.py
index 6e8c7143..56be54c5 100644
--- a/python/llm/dev/benchmark/harness/run_llb.py
+++ b/python/llm/dev/benchmark/harness/run_llb.py
@@ -90,7 +90,7 @@ def main():
         prec_arg = parse_precision(prec, args.model)
         model_args = f"pretrained={args.pretrained},{prec_arg}"
         if len(args.model_args) > 0:
-            model_args += args.model_args
+            model_args = f"{model_args},{args.model_args}"
         for task in args.tasks:
             task_names=task_map.get(task, task).split(',')
             num_fewshot = task_to_n_few_shots.get(task, args.num_fewshot)