* Rename bigdl/llm to ipex_llm * rm python/llm/src/bigdl * from bigdl.llm to from ipex_llm
144 lines
6.7 KiB
Python
144 lines
6.7 KiB
Python
#
|
|
# Copyright 2016 The BigDL Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
|
|
from ipex_llm.utils.common import invalidInputError
|
|
import argparse
|
|
import os
|
|
|
|
|
|
def _special_kwarg_check(kwargs, check_args):
|
|
_used_args = {}
|
|
for arg in kwargs:
|
|
if arg in check_args:
|
|
_used_args[arg] = kwargs[arg]
|
|
return True, _used_args
|
|
|
|
|
|
def llm_convert(model,
|
|
outfile,
|
|
model_family,
|
|
outtype='int4',
|
|
model_format="pth",
|
|
**kwargs):
|
|
"""
|
|
This function is able to:
|
|
|
|
1. Convert Hugging Face llama-like / gpt-neox-like / bloom-like / starcoder-like
|
|
/ chatglm-like PyTorch model to lower precision in BigDL-LLM optimized GGML format.
|
|
2. Convert Hugging Face GPTQ format llama-like model to BigDL-LLM optimized
|
|
GGML format.
|
|
|
|
:param model: Path to a **directory**:
|
|
|
|
1. If ``model_format='pth'``, the folder should be a Hugging Face checkpoint
|
|
that is directly pulled from Hugging Face hub, for example ``./llama-7b-hf``.
|
|
This should be a dir path that contains: weight bin, tokenizer config,
|
|
tokenizer.model (required for llama) and added_tokens.json (if applied).
|
|
For lora finetuned model, the path should be pointed to a merged weight.
|
|
2. If ``model_format='gptq'``, the folder should be be a Hugging Face checkpoint
|
|
in GPTQ format, which contains weights in pytorch's .pt format,
|
|
and ``tokenizer.model``.
|
|
|
|
:param outfile: Save path of output quantized model. You must pass a **directory** to
|
|
save all related output.
|
|
:param model_family: Which model family your input model belongs to.
|
|
Now ``llama``/``bloom``/``gptneox``/``starcoder``/``chatglm`` has been supported.
|
|
If ``model_format='gptq'``, only ``llama`` is supported.
|
|
:param dtype: Which quantized precision will be converted.
|
|
If ``model_format='pth'``, `int4` and `int8` are supported,
|
|
meanwhile `int8` only works for `llama` and `gptneox`.
|
|
If ``model_format='gptq'``, only ``int4`` is supported.
|
|
:param model_format: Specify the model format to be converted. ``pth`` is for
|
|
PyTorch model checkpoint from Hugging Face. ``gptq`` is for GPTQ format
|
|
model from Hugging Face.
|
|
:param **kwargs: Supported keyword arguments includes:
|
|
|
|
* ``tmp_path``: Valid when ``model_format='pth'``. It refers to the path
|
|
that stores the intermediate model during the conversion process.
|
|
* ``tokenizer_path``: Valid when ``model_format='gptq'``. It refers to the path
|
|
where ``tokenizer.model`` is located (if it is not in the ``model`` directory)
|
|
|
|
:return: the path string to the converted lower precision checkpoint.
|
|
"""
|
|
if model_format == "pth":
|
|
from ipex_llm.ggml.convert_model import convert_model as ggml_convert_model
|
|
_, _used_args = _special_kwarg_check(kwargs=kwargs,
|
|
check_args=["tmp_path"])
|
|
return ggml_convert_model(input_path=model,
|
|
output_path=outfile,
|
|
model_family=model_family,
|
|
dtype=outtype,
|
|
**_used_args,
|
|
)
|
|
elif model_format == "gptq":
|
|
from ipex_llm.gptq.convert.convert_gptq_to_ggml import convert_gptq2ggml
|
|
invalidInputError(model_family == "llama" and outtype == 'int4',
|
|
"Convert GPTQ models should always "
|
|
"specify `--model-family llama --dtype int4` in the command line.")
|
|
os.makedirs(outfile, exist_ok=True)
|
|
invalidInputError(os.path.isdir(outfile),
|
|
"The output_path {} is not a directory".format(outfile))
|
|
_, _used_args = _special_kwarg_check(kwargs=kwargs,
|
|
check_args=["tokenizer_path"])
|
|
|
|
output_filename = "bigdl_llm_{}_{}_from_gptq.bin".format(model_family,
|
|
outtype.lower())
|
|
outfile = os.path.join(outfile, output_filename)
|
|
|
|
# TODO: delete this when support AutoTokenizer
|
|
if "tokenizer_path" in _used_args:
|
|
gptq_tokenizer_path = _used_args["tokenizer_path"]
|
|
else:
|
|
gptq_tokenizer_path = None
|
|
|
|
convert_gptq2ggml(model_path=model,
|
|
output_path=outfile,
|
|
tokenizer_path=gptq_tokenizer_path,
|
|
)
|
|
return outfile
|
|
else:
|
|
invalidInputError(False, f"Unsupported input model_type: {model_format}")
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Model Convert Parameters')
|
|
parser.add_argument('model', type=str,
|
|
help=("model, a path to a *directory* containing model weights"))
|
|
parser.add_argument('-o', '--outfile', type=str, required=True,
|
|
help=("outfile,save path of output quantized model."))
|
|
parser.add_argument('-x', '--model-family', type=str, required=True,
|
|
help=("--model-family: Which model family your input model belongs to."
|
|
"Now only `llama`/`bloom`/`gptneox`/`chatglm` are supported."))
|
|
parser.add_argument('-f', '--model-format', type=str, required=True,
|
|
help=("The model type to be convert to a ggml compatible file."
|
|
"Now only `pth`/`gptq` are supported."))
|
|
parser.add_argument('-t', '--outtype', type=str, default="int4",
|
|
help="Which quantized precision will be converted.")
|
|
|
|
# pth specific args
|
|
parser.add_argument('-p', '--tmp-path', type=str, default=None,
|
|
help="Which path to store the intermediate model during the"
|
|
"conversion process.")
|
|
|
|
# gptq specific args
|
|
parser.add_argument('-k', '--tokenizer-path', type=str, default=None,
|
|
help="tokenizer_path, a path of tokenizer.model")
|
|
args = parser.parse_args()
|
|
params = vars(args)
|
|
llm_convert(**params)
|