parent
493cbd9a36
commit
8fb36b9f4a
11 changed files with 4607 additions and 9 deletions
|
|
@ -9,7 +9,7 @@ Take `chatglm-6b` as an example:
|
||||||
import torch
|
import torch
|
||||||
from ipex_llm.transformers import AutoModel
|
from ipex_llm.transformers import AutoModel
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
|
|
||||||
model_path ='THUDM/chatglm-6b'
|
model_path ='THUDM/chatglm-6b'
|
||||||
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
||||||
|
|
@ -37,7 +37,7 @@ import torch
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
from ipex_llm.transformers import AutoModel
|
from ipex_llm.transformers import AutoModel
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
|
|
||||||
model_path ='THUDM/chatglm-6b'
|
model_path ='THUDM/chatglm-6b'
|
||||||
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
||||||
|
|
@ -66,7 +66,7 @@ For example, just need to apply following code patch on [Deepspeed Autotp exampl
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
import deepspeed
|
import deepspeed
|
||||||
+from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
|
|
||||||
def get_int_from_env(env_keys, default):
|
def get_int_from_env(env_keys, default):
|
||||||
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
repo_id:
|
repo_id:
|
||||||
# - 'THUDM/chatglm2-6b'
|
# - 'THUDM/chatglm2-6b'
|
||||||
- 'meta-llama/Llama-2-7b-chat-hf'
|
- 'meta-llama/Llama-2-7b-chat-hf'
|
||||||
|
# - 'meta-llama/Meta-Llama-3.1-8B-Instruct'
|
||||||
# - 'liuhaotian/llava-v1.5-7b' # requires a LLAVA_REPO_DIR env variables pointing to the llava dir; added only for gpu win related test_api now
|
# - 'liuhaotian/llava-v1.5-7b' # requires a LLAVA_REPO_DIR env variables pointing to the llava dir; added only for gpu win related test_api now
|
||||||
local_model_hub: 'path to your local model hub'
|
local_model_hub: 'path to your local model hub'
|
||||||
warm_up: 1 # must set >=2 when run "pipeline_parallel_gpu" test_api
|
warm_up: 1 # must set >=2 when run "pipeline_parallel_gpu" test_api
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ from datetime import date
|
||||||
import os
|
import os
|
||||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
import sys
|
import sys
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
from ipex_llm.utils.common.log4Error import invalidInputError
|
from ipex_llm.utils.common.log4Error import invalidInputError
|
||||||
|
|
||||||
LLAMA_IDS = ['meta-llama/Llama-2-7b-chat-hf','meta-llama/Llama-2-13b-chat-hf',
|
LLAMA_IDS = ['meta-llama/Llama-2-7b-chat-hf','meta-llama/Llama-2-13b-chat-hf',
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ from datetime import date
|
||||||
import os
|
import os
|
||||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
import sys
|
import sys
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
from ipex_llm.utils.common.log4Error import invalidInputError
|
from ipex_llm.utils.common.log4Error import invalidInputError
|
||||||
from ipex_llm.utils.common import invalidInputError
|
from ipex_llm.utils.common import invalidInputError
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||||
PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
|
PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
|
||||||
echo $PYTHON_ROOT_DIR
|
echo $PYTHON_ROOT_DIR
|
||||||
PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
|
PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
|
||||||
PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*,benchmark_util.py,tgi_api_server.py"
|
PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*,benchmark_util_4_29.py,benchmark_util_4_43.py,tgi_api_server.py"
|
||||||
PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
|
PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
|
||||||
PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
|
PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
|
||||||
PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
|
PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ from transformers.utils import logging
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
|
|
||||||
|
|
||||||
def get_int_from_env(env_keys, default):
|
def get_int_from_env(env_keys, default):
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ class ModelWorker:
|
||||||
self.dtype = torch_dtype
|
self.dtype = torch_dtype
|
||||||
start = time.perf_counter()
|
start = time.perf_counter()
|
||||||
model = self.load_model(checkpoint, low_bit)
|
model = self.load_model(checkpoint, low_bit)
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
self.model = BenchmarkWrapper(model, do_print=True)
|
self.model = BenchmarkWrapper(model, do_print=True)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
logger.info(f"Time to load weights: {end - start:.2f}s")
|
logger.info(f"Time to load weights: {end - start:.2f}s")
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ class BigDLLLMWorker(BaseModelWorker):
|
||||||
load_low_bit_model,
|
load_low_bit_model,
|
||||||
)
|
)
|
||||||
if benchmark.lower() == "true" and not speculative:
|
if benchmark.lower() == "true" and not speculative:
|
||||||
from ipex_llm.utils.benchmark_util import BenchmarkWrapper
|
from ipex_llm.utils import BenchmarkWrapper
|
||||||
self.model = BenchmarkWrapper(self.model, do_print=True)
|
self.model = BenchmarkWrapper(self.model, do_print=True)
|
||||||
logger.info(f"enable benchmark successfully")
|
logger.info(f"enable benchmark successfully")
|
||||||
self.stream_interval = stream_interval
|
self.stream_interval = stream_interval
|
||||||
|
|
|
||||||
|
|
@ -18,3 +18,11 @@
|
||||||
# physically located elsewhere.
|
# physically located elsewhere.
|
||||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||||
# only search the first bigdl package and end up finding only one sub-package.
|
# only search the first bigdl package and end up finding only one sub-package.
|
||||||
|
import transformers
|
||||||
|
|
||||||
|
trans_version = transformers.__version__
|
||||||
|
|
||||||
|
if trans_version >= "4.43.1":
|
||||||
|
from .benchmark_util_4_43 import BenchmarkWrapper
|
||||||
|
else:
|
||||||
|
from .benchmark_util_4_29 import BenchmarkWrapper
|
||||||
|
|
|
||||||
4589
python/llm/src/ipex_llm/utils/benchmark_util_4_43.py
Normal file
4589
python/llm/src/ipex_llm/utils/benchmark_util_4_43.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue