diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 11255e03..e940f245 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -189,7 +189,7 @@ jobs: fi python run_llb.py \ - --model bigdl-llm \ + --model ipex-llm \ --pretrained ${MODEL_PATH} \ --precision ${{ matrix.precision }} \ --device ${{ matrix.device }} \ diff --git a/python/llm/dev/benchmark/harness/harness_to_leaderboard.py b/python/llm/dev/benchmark/harness/harness_to_leaderboard.py index 82cdc341..5dd04b9a 100644 --- a/python/llm/dev/benchmark/harness/harness_to_leaderboard.py +++ b/python/llm/dev/benchmark/harness/harness_to_leaderboard.py @@ -48,7 +48,7 @@ task_to_metric = dict( drop='f1' ) -def parse_precision(precision, model="bigdl-llm"): +def parse_precision(precision, model="ipex-llm"): result = match(r"([a-zA-Z_]+)(\d+)([a-zA-Z_\d]*)", precision) datatype = result.group(1) bit = int(result.group(2)) @@ -62,6 +62,6 @@ def parse_precision(precision, model="bigdl-llm"): else: if model == "hf-causal": return f"bnb_type={precision}" - if model == "bigdl-llm": + if model == "ipex-llm": return f"load_in_low_bit={precision}" raise RuntimeError(f"invald precision {precision}") diff --git a/python/llm/dev/benchmark/harness/bigdl_llm.py b/python/llm/dev/benchmark/harness/ipexllm.py similarity index 98% rename from python/llm/dev/benchmark/harness/bigdl_llm.py rename to python/llm/dev/benchmark/harness/ipexllm.py index 8626fc1a..0049f1e4 100644 --- a/python/llm/dev/benchmark/harness/bigdl_llm.py +++ b/python/llm/dev/benchmark/harness/ipexllm.py @@ -35,7 +35,7 @@ def force_decrease_order(Reorderer): utils.Reorderer = force_decrease_order(utils.Reorderer) -class BigDLLM(AutoCausalLM): +class IPEXLLM(AutoCausalLM): AUTO_MODEL_CLASS = AutoModelForCausalLM AutoCausalLM_ARGS = inspect.getfullargspec(AutoCausalLM.__init__).args def __init__(self, *args, **kwargs): diff --git a/python/llm/dev/benchmark/harness/run_llb.py b/python/llm/dev/benchmark/harness/run_llb.py index 3e8bd03a..a3ab55b0 100644 --- a/python/llm/dev/benchmark/harness/run_llb.py +++ b/python/llm/dev/benchmark/harness/run_llb.py @@ -20,8 +20,8 @@ import os from harness_to_leaderboard import * from lm_eval import tasks, evaluator, utils, models -from bigdl_llm import BigDLLM -models.MODEL_REGISTRY['bigdl-llm'] = BigDLLM # patch bigdl-llm to harness +from ipexllm import IPEXLLM +models.MODEL_REGISTRY['ipex-llm'] = IPEXLLM # patch ipex-llm to harness logging.getLogger("openai").setLevel(logging.WARNING) diff --git a/python/llm/dev/benchmark/harness/run_multi_llb.py b/python/llm/dev/benchmark/harness/run_multi_llb.py index 77596b6d..7f4b2df3 100644 --- a/python/llm/dev/benchmark/harness/run_multi_llb.py +++ b/python/llm/dev/benchmark/harness/run_multi_llb.py @@ -22,8 +22,9 @@ from lm_eval import tasks, evaluator, utils, models from multiprocessing import Queue, Process import multiprocessing as mp from contextlib import redirect_stdout, redirect_stderr -from bigdl_llm import BigDLLM -models.MODEL_REGISTRY['bigdl-llm'] = BigDLLM # patch bigdl-llm to harness + +from ipexllm import IPEXLLM +models.MODEL_REGISTRY['ipex-llm'] = IPEXLLM # patch ipex-llm to harness logging.getLogger("openai").setLevel(logging.WARNING)