From 5d5da7b2c7e8e82e66374e0965838ff9dd163bc9 Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:17:49 +0800 Subject: [PATCH] LLM: optimize namespace and remove unused import logic (#8302) --- python/llm/src/bigdl/llm/ggml/__init__.py | 5 +- .../src/bigdl/llm/ggml/model/bloom/bloom.py | 5 +- .../bigdl/llm/ggml/model/gptneox/gptneox.py | 7 +- .../src/bigdl/llm/ggml/model/llama/llama.py | 7 +- .../src/bigdl/llm/ggml/transformers/model.py | 4 +- python/llm/src/bigdl/llm/models.py | 24 ++++++ .../src/bigdl/llm/utils/common/__init__.py | 1 + .../src/bigdl/llm/utils/common/lazyimport.py | 76 +++++++++++++++++++ 8 files changed, 113 insertions(+), 16 deletions(-) create mode 100644 python/llm/src/bigdl/llm/models.py create mode 100644 python/llm/src/bigdl/llm/utils/common/lazyimport.py diff --git a/python/llm/src/bigdl/llm/ggml/__init__.py b/python/llm/src/bigdl/llm/ggml/__init__.py index adeb474d..a4ab0da0 100644 --- a/python/llm/src/bigdl/llm/ggml/__init__.py +++ b/python/llm/src/bigdl/llm/ggml/__init__.py @@ -19,6 +19,5 @@ # Otherwise there would be module not found error in non-pip's setting as Python would # only search the first bigdl package and end up finding only one sub-package. -from .quantize import quantize -from .convert import _convert_to_ggml -from .convert_model import convert_model +from bigdl.llm.utils.common import LazyImport +convert_model = LazyImport('bigdl.llm.ggml.convert_model.convert_model') diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py index 242962f2..376e2fbc 100644 --- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py +++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py @@ -60,7 +60,7 @@ class Bloom: n_ctx: int = 512, seed: int = 1337, logits_all: bool = False, - n_threads: int = -1, + n_threads: int = 2, n_batch: int = 8, last_n_tokens_size: int = 64, verbose: bool = True, @@ -72,8 +72,7 @@ class Bloom: n_ctx: Maximum context size. seed: Random seed. 0 for random. logits_all: Return logits for all tokens, not just the last token. - n_threads: Number of threads to use. - If None, the number of threads is automatically determined. + n_threads: Number of threads to use. Default to be 2. n_batch: Maximum number of prompt tokens to batch together when calling llama_eval. last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque. verbose: Print verbose output to stderr. diff --git a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py index d6221cc2..398436c4 100644 --- a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py +++ b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py @@ -139,7 +139,7 @@ class Gptneox(GenerationMixin): use_mmap: bool = True, use_mlock: bool = False, embedding: bool = False, - n_threads: Optional[int] = None, + n_threads: Optional[int] = 2, n_batch: int = 512, last_n_tokens_size: int = 64, lora_base: Optional[str] = None, @@ -160,8 +160,7 @@ class Gptneox(GenerationMixin): use_mmap: Use mmap if possible. use_mlock: Force the system to keep the model in RAM. embedding: Embedding mode only. - n_threads: Number of threads to use. If None, the number of threads - is automatically determined. + n_threads: Number of threads to use. Default to be 2. n_batch: Maximum number of prompt tokens to batch together when calling gptneox_eval. last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque. lora_base: Optional path to base model, useful if using a quantized base model and @@ -197,7 +196,7 @@ class Gptneox(GenerationMixin): self.cache: Optional[GptneoxCache] = None - self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1) + self.n_threads = n_threads self.lora_base = lora_base self.lora_path = lora_path diff --git a/python/llm/src/bigdl/llm/ggml/model/llama/llama.py b/python/llm/src/bigdl/llm/ggml/model/llama/llama.py index 99a81d3d..669757d0 100644 --- a/python/llm/src/bigdl/llm/ggml/model/llama/llama.py +++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama.py @@ -137,7 +137,7 @@ class Llama(GenerationMixin): use_mmap: bool = True, use_mlock: bool = False, embedding: bool = False, - n_threads: Optional[int] = None, + n_threads: Optional[int] = 2, n_batch: int = 512, last_n_tokens_size: int = 64, lora_base: Optional[str] = None, @@ -158,8 +158,7 @@ class Llama(GenerationMixin): use_mmap: Use mmap if possible. use_mlock: Force the system to keep the model in RAM. embedding: Embedding mode only. - n_threads: Number of threads to use. If None, the number of threads is - automatically determined. + n_threads: Number of threads to use. Default to be 2. n_batch: Maximum number of prompt tokens to batch together when calling llama_eval. last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque. lora_base: Optional path to base model, useful if using a quantized base model and @@ -194,7 +193,7 @@ class Llama(GenerationMixin): self.cache: Optional[LlamaCache] = None - self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1) + self.n_threads = n_threads self.lora_base = lora_base self.lora_path = lora_path diff --git a/python/llm/src/bigdl/llm/ggml/transformers/model.py b/python/llm/src/bigdl/llm/ggml/transformers/model.py index b09d326a..9225cbeb 100644 --- a/python/llm/src/bigdl/llm/ggml/transformers/model.py +++ b/python/llm/src/bigdl/llm/ggml/transformers/model.py @@ -21,9 +21,7 @@ import os import traceback -from huggingface_hub import snapshot_download from bigdl.llm.utils.common import invalidInputError -from bigdl.llm.ggml import convert_model class AutoModelForCausalLM: @@ -71,6 +69,7 @@ class AutoModelForCausalLM: if not os.path.exists(pretrained_model_name_or_path): try: # download from huggingface based on repo id + from huggingface_hub import snapshot_download pretrained_model_name_or_path = snapshot_download( repo_id=pretrained_model_name_or_path) except Exception as e: @@ -90,6 +89,7 @@ class AutoModelForCausalLM: # points to a huggingface checkpoint if not os.path.isfile(pretrained_model_name_or_path): # huggingface checkpoint + from bigdl.llm.ggml import convert_model ggml_model_path = convert_model(input_path=pretrained_model_name_or_path, output_path=cache_dir, model_family=model_family, diff --git a/python/llm/src/bigdl/llm/models.py b/python/llm/src/bigdl/llm/models.py new file mode 100644 index 00000000..345a62df --- /dev/null +++ b/python/llm/src/bigdl/llm/models.py @@ -0,0 +1,24 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +from bigdl.llm.ggml.model.llama import Llama +from bigdl.llm.ggml.model.gptneox import Gptneox +from bigdl.llm.ggml.model.bloom import Bloom diff --git a/python/llm/src/bigdl/llm/utils/common/__init__.py b/python/llm/src/bigdl/llm/utils/common/__init__.py index 5b36cc3e..22fc3510 100644 --- a/python/llm/src/bigdl/llm/utils/common/__init__.py +++ b/python/llm/src/bigdl/llm/utils/common/__init__.py @@ -20,3 +20,4 @@ # only search the first bigdl package and end up finding only one sub-package. from .log4Error import invalidInputError, invalidOperationError +from .lazyimport import LazyImport diff --git a/python/llm/src/bigdl/llm/utils/common/lazyimport.py b/python/llm/src/bigdl/llm/utils/common/lazyimport.py new file mode 100644 index 00000000..6380831d --- /dev/null +++ b/python/llm/src/bigdl/llm/utils/common/lazyimport.py @@ -0,0 +1,76 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import importlib +import sys + + +# code adaptted from https://github.com/intel/neural-compressor/ +# blob/master/neural_compressor/utils/utility.py#L88 +class LazyImport: + """ + Lazy import python module until use. + + Example: + >>> from bigdl.llm.utils.common import LazyImport + >>> _convert_to_ggml = LazyImport('bigdl.llm.ggml.convert._convert_to_ggml') + >>> _convert_to_ggml(model_path, outfile_dir) + """ + def __init__(self, module_name: str): + """ + :param module_name: Import module name. + """ + self.module_name = module_name + + def __getattr__(self, name): + absolute_name = importlib.util.resolve_name(self.module_name) + # not reload modules + try: + return getattr(sys.modules[absolute_name], name) + except (KeyError, AttributeError): + pass + + if "." in absolute_name: + # Split module name to prevent class name from being introduced as package + parent_name, _, child_name = absolute_name.rpartition('.') + else: + parent_name, child_name = absolute_name, None + + try: + # For import parent module and get the submodule with getattr. + module = importlib.import_module(parent_name) + module = getattr(module, child_name) if child_name else module + except AttributeError: + # Triggered when the parent module cannot get the child module using getattr. + # More common when calling staticmethods or classmethods. e.g. from_tsdataset. + full_module_name = parent_name+'.'+child_name if child_name else parent_name + spec = importlib.util.find_spec(full_module_name) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + return getattr(module, name) + + def __call__(self, *args, **kwargs): + function_name = self.module_name.rpartition('.')[-1] + module_name = self.module_name.rpartition(f'.{function_name}')[0] + try: + module = sys.modules[module_name] + except KeyError: + pass + + module = importlib.import_module(module_name) + function = getattr(module, function_name) + return function(*args, **kwargs)