LLM: enhancements for convert_model (#8278)
				
					
				
			* update convert * change output name * add discription for input_path, add check for input_values * basic support for command line * fix style * update based on comment * update based on comment
This commit is contained in:
		
							parent
							
								
									2d14e593f0
								
							
						
					
					
						commit
						39ad68e786
					
				
					 5 changed files with 99 additions and 37 deletions
				
			
		| 
						 | 
					@ -42,7 +42,7 @@ BIGDL_PYTHON_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 | 
				
			||||||
VERSION = open(os.path.join(BIGDL_PYTHON_HOME, 'version.txt'), 'r').read().strip()
 | 
					VERSION = open(os.path.join(BIGDL_PYTHON_HOME, 'version.txt'), 'r').read().strip()
 | 
				
			||||||
llm_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
 | 
					llm_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
 | 
				
			||||||
libs_dir = os.path.join(llm_home, "bigdl", "llm", "libs")
 | 
					libs_dir = os.path.join(llm_home, "bigdl", "llm", "libs")
 | 
				
			||||||
CONVERT_DEP = ['numpy', 'torch', 'transformers', 'sentencepiece']
 | 
					CONVERT_DEP = ['numpy', 'torch', 'transformers', 'sentencepiece', 'accelerate']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_llm_packages():
 | 
					def get_llm_packages():
 | 
				
			||||||
| 
						 | 
					@ -151,6 +151,11 @@ def setup_package():
 | 
				
			||||||
        package_dir={"": "src"},
 | 
					        package_dir={"": "src"},
 | 
				
			||||||
        package_data={"bigdl.llm": package_data[platform_name]},
 | 
					        package_data={"bigdl.llm": package_data[platform_name]},
 | 
				
			||||||
        include_package_data=True,
 | 
					        include_package_data=True,
 | 
				
			||||||
 | 
					        entry_points={
 | 
				
			||||||
 | 
					            "console_scripts": [
 | 
				
			||||||
 | 
					                'convert_model=bigdl.llm.ggml.convert_model:main'
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
        extras_require={"all": all_requires},
 | 
					        extras_require={"all": all_requires},
 | 
				
			||||||
        classifiers=[
 | 
					        classifiers=[
 | 
				
			||||||
            'License :: OSI Approved :: Apache Software License',
 | 
					            'License :: OSI Approved :: Apache Software License',
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -77,7 +77,11 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.
 | 
					    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    :param model_path: str, path of model, for example `./llama-7b-hf`.
 | 
					    :param input_path: Path to a *directory*  for huggingface checkpoint that are directly
 | 
				
			||||||
 | 
					            pulled from huggingface hub, for example `./llama-7b-hf`. This should be a dir
 | 
				
			||||||
 | 
					            path that contains: weight bin, tokenizer config, tokenizer.model (required for
 | 
				
			||||||
 | 
					            llama) and added_tokens.json (if applied).
 | 
				
			||||||
 | 
					            For lora finetuned model, the path should be pointed to a merged weight.
 | 
				
			||||||
    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
 | 
					    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
 | 
				
			||||||
    :param model_family: Which model family your input model belongs to. Default to `llama`.
 | 
					    :param model_family: Which model family your input model belongs to. Default to `llama`.
 | 
				
			||||||
            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
					            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,47 +16,99 @@
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					 | 
				
			||||||
from bigdl.llm.ggml.convert import _convert_to_ggml
 | 
					from bigdl.llm.ggml.convert import _convert_to_ggml
 | 
				
			||||||
from bigdl.llm.ggml.quantize import quantize
 | 
					from bigdl.llm.ggml.quantize import quantize
 | 
				
			||||||
 | 
					from bigdl.llm.utils.common import invalidInputError
 | 
				
			||||||
 | 
					import argparse
 | 
				
			||||||
 | 
					import tempfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def convert_model(input_path: str,
 | 
					def convert_model(input_path: str,
 | 
				
			||||||
                  output_path: str,
 | 
					                  output_path: str,
 | 
				
			||||||
                  model_family: str,
 | 
					                  model_family: str,
 | 
				
			||||||
                  dtype: str = 'int4',
 | 
					                  dtype: str = 'int4',
 | 
				
			||||||
                  tmp_path: str = '/tmp'):
 | 
					                  tmp_path: str = None):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
 | 
					    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    :param input_path: str, path of model, for example `./llama-7b-hf`.
 | 
					    :param input_path: Path to a *directory*  for huggingface checkpoint that are directly
 | 
				
			||||||
    :param output_path: Save path of output quantized model. Default to `None`.
 | 
					            pulled from huggingface hub, for example `./llama-7b-hf`. This should be a dir
 | 
				
			||||||
            If you don't specify this parameter, quantized model will be saved in
 | 
					            path that contains: weight bin, tokenizer config, tokenizer.model (required for
 | 
				
			||||||
            the same directory as the input and just replace precision with quantize_type
 | 
					            llama) and added_tokens.json (if applied).
 | 
				
			||||||
            like `./ggml-model-q4_0.bin`.
 | 
					            For lora finetuned model, the path should be pointed to a merged weight.
 | 
				
			||||||
 | 
					    :param output_path: Save path of output quantized model. You must pass a *directory* to
 | 
				
			||||||
 | 
					            save all related output.
 | 
				
			||||||
    :param model_family: Which model family your input model belongs to.
 | 
					    :param model_family: Which model family your input model belongs to.
 | 
				
			||||||
            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
					            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
				
			||||||
    :param dtype: Which quantized precision will be converted.
 | 
					    :param dtype: Which quantized precision will be converted.
 | 
				
			||||||
            Now only int4 supported.
 | 
					            Now only int4 is supported.
 | 
				
			||||||
    :param tmp_path: Which path to store the intermediate model during the conversion process.
 | 
					    :param tmp_path: Which path to store the intermediate model during the conversion process.
 | 
				
			||||||
 | 
					            Default to `None` so that intermediate model will not be saved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    :return: the path str to the converted lower precision checkpoint
 | 
					    :return: the path string to the converted lower precision checkpoint.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    dtype = dtype.lower()
 | 
					    dtype = dtype.lower()
 | 
				
			||||||
 | 
					    # make sure directory exists
 | 
				
			||||||
 | 
					    os.makedirs(output_path, exist_ok=True)
 | 
				
			||||||
 | 
					    # check input value
 | 
				
			||||||
 | 
					    invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
 | 
				
			||||||
 | 
					                      "Now we only support quantization of model \
 | 
				
			||||||
 | 
					                       family('llama', 'bloom', 'gptneox')",
 | 
				
			||||||
 | 
					                      "{} is not in the list.".format(model_family))
 | 
				
			||||||
 | 
					    invalidInputError(os.path.isdir(output_path),
 | 
				
			||||||
 | 
					                      "The output_path {} was not a directory".format(output_path))
 | 
				
			||||||
 | 
					    invalidInputError(dtype == 'int4',
 | 
				
			||||||
 | 
					                      "Now only int4 is supported.")
 | 
				
			||||||
 | 
					    # check for input_path
 | 
				
			||||||
 | 
					    invalidInputError(os.path.exists(input_path),
 | 
				
			||||||
 | 
					                      "The input path {} was not found".format(input_path))
 | 
				
			||||||
 | 
					    invalidInputError(os.path.isdir(input_path),
 | 
				
			||||||
 | 
					                      "The input path {} was not a directory".format(input_path))
 | 
				
			||||||
 | 
					    # shall we support model_id or just model directory?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if dtype == 'int4':
 | 
					    if dtype == 'int4':
 | 
				
			||||||
        dtype = 'q4_0'
 | 
					        dtype = 'q4_0'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model_name = Path(input_path).stem
 | 
					    if tmp_path is not None:
 | 
				
			||||||
    tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
 | 
					        model_name = Path(input_path).stem
 | 
				
			||||||
    _convert_to_ggml(model_path=input_path,
 | 
					        tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
 | 
				
			||||||
                     outfile_dir=tmp_ggml_file_path,
 | 
					        _convert_to_ggml(model_path=input_path,
 | 
				
			||||||
                     model_family=model_family,
 | 
					                         outfile_dir=tmp_ggml_file_path,
 | 
				
			||||||
                     outtype="fp16")
 | 
					                         model_family=model_family,
 | 
				
			||||||
 | 
					                         outtype="fp16")
 | 
				
			||||||
 | 
					        tmp_ggml_file_path = next(Path(tmp_ggml_file_path).iterdir())
 | 
				
			||||||
 | 
					        return quantize(input_path=tmp_ggml_file_path,
 | 
				
			||||||
 | 
					                        output_path=output_path,
 | 
				
			||||||
 | 
					                        model_family=model_family,
 | 
				
			||||||
 | 
					                        dtype=dtype)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        with tempfile.TemporaryDirectory() as tmp_ggml_file_path:
 | 
				
			||||||
 | 
					            _convert_to_ggml(model_path=input_path,
 | 
				
			||||||
 | 
					                             outfile_dir=tmp_ggml_file_path,
 | 
				
			||||||
 | 
					                             model_family=model_family,
 | 
				
			||||||
 | 
					                             outtype="fp16")
 | 
				
			||||||
 | 
					            tmp_ggml_file_path = next(Path(tmp_ggml_file_path).iterdir())
 | 
				
			||||||
 | 
					            return quantize(input_path=tmp_ggml_file_path,
 | 
				
			||||||
 | 
					                            output_path=output_path,
 | 
				
			||||||
 | 
					                            model_family=model_family,
 | 
				
			||||||
 | 
					                            dtype=dtype)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tmp_ggml_file_path = next(Path(tmp_ggml_file_path).iterdir())
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return quantize(input_path=tmp_ggml_file_path,
 | 
					def main():
 | 
				
			||||||
                    output_path=output_path,
 | 
					    parser = argparse.ArgumentParser(description='Model Convert Parameters')
 | 
				
			||||||
                    model_family=model_family,
 | 
					    parser.add_argument('-i', '--input_path', type=str, required=True,
 | 
				
			||||||
                    dtype=dtype)
 | 
					                        help=("input_path, a path to a *directory* containing model weights"))
 | 
				
			||||||
 | 
					    parser.add_argument('-o', '--output_path', type=str, required=True,
 | 
				
			||||||
 | 
					                        help=("output_path,save path of output quantized model."))
 | 
				
			||||||
 | 
					    parser.add_argument('-x', '--model_family', type=str, required=True,
 | 
				
			||||||
 | 
					                        help=("model_family: Which model family your input model belongs to."
 | 
				
			||||||
 | 
					                              "Now only `llama`/`bloom`/`gptneox` are supported."))
 | 
				
			||||||
 | 
					    parser.add_argument('-t', '--dtype', type=str, default="int4",
 | 
				
			||||||
 | 
					                        help="Which quantized precision will be converted.")
 | 
				
			||||||
 | 
					    parser.add_argument('-p', '--tmp_path', type=str, default=None,
 | 
				
			||||||
 | 
					                        help="Which path to store the intermediate model during the"
 | 
				
			||||||
 | 
					                        "conversion process.")
 | 
				
			||||||
 | 
					    args = parser.parse_args()
 | 
				
			||||||
 | 
					    params = vars(args)
 | 
				
			||||||
 | 
					    convert_model(**params)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,24 +42,23 @@ _quantize_type = {"llama": _llama_quantize_type,
 | 
				
			||||||
                  "gptneox": _gptneox_quantize_type}
 | 
					                  "gptneox": _gptneox_quantize_type}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def quantize(input_path: str, output_path: str=None,
 | 
					def quantize(input_path: str, output_path: str,
 | 
				
			||||||
             model_family: str = 'llama', dtype: str='q4_0'):
 | 
					             model_family: str, dtype: str='q4_0'):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Quantize ggml file to lower precision.
 | 
					    Quantize ggml file to lower precision.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
 | 
					    :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
 | 
				
			||||||
    :param output_path: Save path of output quantized model. Default to `None`.
 | 
					    :param output_path: Save path of output quantized model. You must pass a directory to
 | 
				
			||||||
            If you don't specify this parameter, quantized model will be saved in
 | 
					            save all related output. Filename of quantized model will be like
 | 
				
			||||||
            the same directory as the input and just replace precision with quantize_type
 | 
					            `bigdl_llm_llama_q4_0.bin`.
 | 
				
			||||||
            like `./ggml-model-q4_0.bin`.
 | 
					    :param model_family: Which model family your input model belongs to.
 | 
				
			||||||
    :param model_family: Which model family your input model belongs to. Default to `llama`.
 | 
					 | 
				
			||||||
            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
					            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
				
			||||||
    :param dtype: Quantization method which differs in the resulting model disk size and
 | 
					    :param dtype: Quantization method which differs in the resulting model disk size and
 | 
				
			||||||
            inference speed. Defalut to `q4_0`. Difference model family may support different types,
 | 
					            inference speed. Defalut to `q4_0`. Difference model family may support
 | 
				
			||||||
            now the supported list is:
 | 
					            different types, now the supported list is:
 | 
				
			||||||
            llama : "q4_0", "q4_1", "q4_2"
 | 
					            llama : "q4_0", "q4_1", "q4_2"
 | 
				
			||||||
            bloom : "q4_0", "q4_1"
 | 
					            bloom : "q4_0", "q4_1"
 | 
				
			||||||
            gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
 | 
					            gptneox : "q4_0", "q4_1", "q5_0", "q5_1", "q8_0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    :return: the path str to the converted ggml binary checkpoint
 | 
					    :return: the path str to the converted ggml binary checkpoint
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					@ -69,12 +68,14 @@ def quantize(input_path: str, output_path: str=None,
 | 
				
			||||||
                      "{} is not in the list.".format(model_family))
 | 
					                      "{} is not in the list.".format(model_family))
 | 
				
			||||||
    invalidInputError(os.path.isfile(input_path),
 | 
					    invalidInputError(os.path.isfile(input_path),
 | 
				
			||||||
                      "The file {} was not found".format(input_path))
 | 
					                      "The file {} was not found".format(input_path))
 | 
				
			||||||
    # TODO : multi input model path
 | 
					    invalidInputError(os.path.isdir(output_path),
 | 
				
			||||||
    if output_path is None:
 | 
					                      "The output_path {} was not a directory".format(output_path))
 | 
				
			||||||
        output_path = Path(str(input_path).replace('f16', dtype))
 | 
					 | 
				
			||||||
    # convert quantize type str into corresponding int value
 | 
					    # convert quantize type str into corresponding int value
 | 
				
			||||||
    quantize_type_map = _quantize_type[model_family]
 | 
					    quantize_type_map = _quantize_type[model_family]
 | 
				
			||||||
    invalidInputError(dtype in quantize_type_map, "{0} model just accept {1} now, \
 | 
					    output_filename = "bigdl_llm_{}_{}.bin".format(model_family,
 | 
				
			||||||
 | 
					                                                   dtype.lower())
 | 
				
			||||||
 | 
					    output_path = os.path.join(output_path, output_filename)
 | 
				
			||||||
 | 
					    invalidInputError(dtype.lower() in quantize_type_map, "{0} model just accept {1} now, \
 | 
				
			||||||
                      but you pass in {2}.".format(
 | 
					                      but you pass in {2}.".format(
 | 
				
			||||||
                      model_family,
 | 
					                      model_family,
 | 
				
			||||||
                      list(quantize_type_map.keys()),
 | 
					                      list(quantize_type_map.keys()),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,7 +42,7 @@ class AutoModelForCausalLM:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        :param pretrained_model_name_or_path: We support 3 kinds of pretrained model checkpoint
 | 
					        :param pretrained_model_name_or_path: We support 3 kinds of pretrained model checkpoint
 | 
				
			||||||
 | 
					
 | 
				
			||||||
               1. path for huggingface checkpoint that are directly pulled from hugginface hub.
 | 
					               1. path for huggingface checkpoint that are directly pulled from huggingface hub.
 | 
				
			||||||
                  This should be a dir path that contains: weight bin, tokenizer config,
 | 
					                  This should be a dir path that contains: weight bin, tokenizer config,
 | 
				
			||||||
                  tokenizer.model (required for llama) and added_tokens.json (if applied).
 | 
					                  tokenizer.model (required for llama) and added_tokens.json (if applied).
 | 
				
			||||||
                  For lora fine tuned model, the path should be pointed to a merged weight.
 | 
					                  For lora fine tuned model, the path should be pointed to a merged weight.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue