From 45820cf3b90e903e46313a2d418faa9cc16a54a6 Mon Sep 17 00:00:00 2001 From: "Chen, Zhentao" Date: Fri, 24 Nov 2023 17:10:49 +0800 Subject: [PATCH] add optimize model option (#9530) --- python/llm/dev/benchmark/harness/bigdl_llm.py | 7 ++++--- python/llm/dev/benchmark/harness/run_llb.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/llm/dev/benchmark/harness/bigdl_llm.py b/python/llm/dev/benchmark/harness/bigdl_llm.py index c2ba098b..39dd0058 100644 --- a/python/llm/dev/benchmark/harness/bigdl_llm.py +++ b/python/llm/dev/benchmark/harness/bigdl_llm.py @@ -46,9 +46,10 @@ class BigDLLM(BaseLM): tokenizer=None, batch_size=1, load_in_8bit: Optional[bool] = False, - trust_remote_code: Optional[bool] = False, + trust_remote_code: Optional[bool] = True, load_in_low_bit=None, dtype: Optional[Union[str, torch.dtype]] = "auto", + **kwargs ): super().__init__() @@ -58,8 +59,8 @@ class BigDLLM(BaseLM): import intel_extension_for_pytorch as ipex model = AutoModelForCausalLM.from_pretrained(pretrained, load_in_low_bit=load_in_low_bit, - optimize_model=True, - trust_remote_code=True, + optimize_model=kwargs.get('optimize_model', True), + trust_remote_code=trust_remote_code, use_cache=True, torch_dtype=_get_dtype(dtype)) print(model) # print model to check precision diff --git a/python/llm/dev/benchmark/harness/run_llb.py b/python/llm/dev/benchmark/harness/run_llb.py index 6e8c7143..56be54c5 100644 --- a/python/llm/dev/benchmark/harness/run_llb.py +++ b/python/llm/dev/benchmark/harness/run_llb.py @@ -90,7 +90,7 @@ def main(): prec_arg = parse_precision(prec, args.model) model_args = f"pretrained={args.pretrained},{prec_arg}" if len(args.model_args) > 0: - model_args += args.model_args + model_args = f"{model_args},{args.model_args}" for task in args.tasks: task_names=task_map.get(task, task).split(',') num_fewshot = task_to_n_few_shots.get(task, args.num_fewshot)