* remove chatglm_C.**.pyd to solve ngsolve weak copyright vunl * fix style check error * remove chatglm native int4 from langchain
157 lines
6.6 KiB
Python
157 lines
6.6 KiB
Python
#
|
|
# Copyright 2016 The BigDL Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
|
# physically located elsewhere.
|
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
|
# only search the first bigdl package and end up finding only one sub-package.
|
|
|
|
import importlib
|
|
import logging
|
|
|
|
from ipex_llm.utils.common import invalidInputError
|
|
from .model import *
|
|
|
|
|
|
class BigdlNativeForCausalLM:
|
|
"""
|
|
A generic model class that mimics the behavior of
|
|
``transformers.LlamaForCausalLM.from_pretrained`` API
|
|
"""
|
|
|
|
@classmethod
|
|
def from_pretrained(cls,
|
|
pretrained_model_name_or_path: str,
|
|
model_family: str = 'llama',
|
|
dtype: str = 'int4',
|
|
**kwargs):
|
|
"""
|
|
:param pretrained_model_name_or_path: Path for converted BigDL-LLM optimized ggml
|
|
binary checkpoint. The checkpoint should be converted by ``ipex_llm.llm_convert``.
|
|
:param model_family: The model family of the pretrained checkpoint.
|
|
Currently we support ``"llama"``, ``"bloom"``, ``"gptneox"``, ``"starcoder"``.
|
|
:param dtype: Which quantized precision will be converted.
|
|
Now only `int4` and `int8` are supported, and `int8` only works for `llama`
|
|
, `gptneox` and `starcoder`.
|
|
:param cache_dir: (optional) This parameter will only be used when
|
|
``pretrained_model_name_or_path`` is a huggingface checkpoint or hub repo id.
|
|
It indicates the saving path for the converted low precision model.
|
|
:param tmp_path: (optional) Which path to store the intermediate fp16 model during the
|
|
conversion process. Default to `None` so that intermediate model will not be saved.
|
|
:param kwargs: keyword arguments which will be passed to the model instance
|
|
|
|
:return: a model instance
|
|
"""
|
|
logging.warning("BigdlNativeForCausalLM has been deprecated, "
|
|
"please switch to the new CausalLM API for sepcific models.")
|
|
invalidInputError(model_family in ['llama', 'gptneox', 'bloom', 'starcoder'],
|
|
"Now we only support model family: 'llama', 'gptneox', 'bloom',"
|
|
" 'starcoder', '{}' is not in the list.".format(model_family))
|
|
invalidInputError(dtype.lower() in ['int4', 'int8'],
|
|
"Now we only support int4 and int8 as date type for weight")
|
|
|
|
ggml_model_path = pretrained_model_name_or_path
|
|
|
|
if model_family == 'llama':
|
|
from ipex_llm.ggml.model.llama import Llama
|
|
return Llama(model_path=ggml_model_path, **kwargs)
|
|
elif model_family == 'gptneox':
|
|
from ipex_llm.ggml.model.gptneox import Gptneox
|
|
return Gptneox(model_path=ggml_model_path, **kwargs)
|
|
elif model_family == 'bloom':
|
|
from ipex_llm.ggml.model.bloom import Bloom
|
|
return Bloom(model_path=ggml_model_path, **kwargs)
|
|
elif model_family == 'starcoder':
|
|
from ipex_llm.ggml.model.starcoder import Starcoder
|
|
return Starcoder(model_path=ggml_model_path, **kwargs)
|
|
|
|
|
|
class _BaseGGMLClass:
|
|
|
|
GGML_Model = None
|
|
HF_Class = None
|
|
|
|
@classmethod
|
|
def from_pretrained(cls,
|
|
pretrained_model_name_or_path: str,
|
|
native: bool = True,
|
|
dtype: str = "int4",
|
|
*args,
|
|
**kwargs):
|
|
"""
|
|
:param pretrained_model_name_or_path: Path for model checkpoint.
|
|
If running with ``native int4``, the path should be converted BigDL-LLM optimized
|
|
ggml binary checkpoint, which should be converted by ``ipex_llm.llm_convert``.
|
|
If running with ``transformers int4``, the path should be the huggingface repo id
|
|
to be downloaded or the huggingface checkpoint folder.
|
|
:param native: Load model to either BigDL-LLM optimized Transformer or Native (ggml) int4.
|
|
:param dtype: Which quantized precision will be converted.
|
|
Now only `int4` and `int8` are supported, and `int8` only works for `llama`
|
|
, `gptneox` and `starcoder`.
|
|
:param kwargs: keyword arguments which will be passed to the model instance.
|
|
|
|
:return: a model instance
|
|
"""
|
|
try:
|
|
if native:
|
|
module = importlib.import_module(cls.GGML_Module)
|
|
class_ = getattr(module, cls.GGML_Model)
|
|
invalidInputError(dtype.lower() in ['int4', 'int8'],
|
|
"Now we only support int4 and int8 as date type for weight")
|
|
ggml_model_path = pretrained_model_name_or_path
|
|
model = class_(model_path=ggml_model_path, **kwargs)
|
|
else:
|
|
model = cls.HF_Class.from_pretrained(pretrained_model_name_or_path,
|
|
*args, **kwargs)
|
|
except Exception as e:
|
|
invalidInputError(
|
|
False,
|
|
f"Could not load model from path: {pretrained_model_name_or_path}. "
|
|
f"Please make sure the CausalLM class matches "
|
|
"the model you want to load."
|
|
f"Received error {e}"
|
|
)
|
|
return model
|
|
|
|
|
|
class LlamaForCausalLM(_BaseGGMLClass):
|
|
GGML_Module = "ipex_llm.models"
|
|
GGML_Model = "Llama"
|
|
HF_Class = AutoModelForCausalLM
|
|
|
|
|
|
class ChatGLMForCausalLM(_BaseGGMLClass):
|
|
GGML_Module = "ipex_llm.ggml.model.chatglm"
|
|
GGML_Model = "ChatGLM"
|
|
HF_Class = AutoModel
|
|
|
|
|
|
class GptneoxForCausalLM(_BaseGGMLClass):
|
|
GGML_Module = "ipex_llm.models"
|
|
GGML_Model = "Gptneox"
|
|
HF_Class = AutoModelForCausalLM
|
|
|
|
|
|
class BloomForCausalLM(_BaseGGMLClass):
|
|
GGML_Module = "ipex_llm.models"
|
|
GGML_Model = "Bloom"
|
|
HF_Class = AutoModelForCausalLM
|
|
|
|
|
|
class StarcoderForCausalLM(_BaseGGMLClass):
|
|
GGML_Module = "ipex_llm.models"
|
|
GGML_Model = "Starcoder"
|
|
HF_Class = AutoModelForCausalLM
|