ipex-llm/python/llm/src/ipex_llm/ggml/convert.py

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===========================================================================
#
# This file is adapted from
# https://github.com/ggerganov/llama.cpp/blob/master/convert.py
#
# MIT License
#
# Copyright (c) 2023 Georgi Gerganov
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from ipex_llm.utils.common import invalidInputError
from ipex_llm.utils.convert_util import *
from pathlib import Path
import os


def _convert_llama(model_path, outfile_dir, outtype):
    model_path = Path(model_path)
    outfile_dir = Path(outfile_dir)
    model_plus = load_some_model(model_path)
    if model_plus.vocab is not None:
        vocab = model_plus.vocab
    else:
        vocab_dir = model_plus.paths[0].parent
        vocab = load_vocab(vocab_dir, vocabtype='spm')
    params = Params.load(model_plus)
    model = model_plus.model
    model = do_necessary_conversions(model, params)
    output_type = pick_output_type(model, outtype)
    model = convert_to_output_type(model, output_type)
    outfile_path = default_outfile([outfile_dir], output_type)
    OutputFile.write_all(outfile_path, params, output_type, model, vocab)


def _convert_gptneox(model_path, outfile_dir, outtype):
    _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_bloom(model_path, outfile_dir, outtype):
    _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_starcoder(model_path, outfile_dir, outtype):
    _convert_starcoder_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_chatglm(model_path, outfile_dir, outtype):
    return _convert_chatglm_hf_to_ggml(model_path, outfile_dir, outtype)


def _convert_to_ggml(model_path: str, outfile_dir: str,
                     model_family: str = 'llama', outtype: str="fp16"):
    """
    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.

    :param input_path: Path to a *directory*  for huggingface checkpoint that are directly
            pulled from huggingface hub, for example `./llama-7b-hf`. This should be a dir
            path that contains: weight bin, tokenizer config, tokenizer.model (required for
            llama) and added_tokens.json (if applied).
            For lora finetuned model, the path should be pointed to a merged weight.
    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
    :param model_family: Which model family your input model belongs to. Default to `llama`.
            Now only `llama`/`bloom`/`gptneox`/`starcoder` are supported.
    :param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
    """
    invalidInputError(model_family in ['llama', 'bloom', 'gptneox', 'starcoder'],
                      "Now we only support quantization of model \
                       family('llama', 'bloom', 'gptneox', 'starcoder')",
                      "{} is not in the list.".format(model_family))
    invalidInputError(os.path.exists(model_path),
                      "The file {} was not found".format(model_path))
    invalidInputError(outtype in ['fp32', 'fp16'],
                      "Now we only support converting to 'fp32'/'fp16' format",
                      "{} is not in the list.".format(outtype))

    # make sure the output directory exists
    os.makedirs(outfile_dir, exist_ok=True)

    outtype = outtype.replace('p', '')
    print("It may takes several minutes to load the original model, please wait...")
    if model_family == 'llama':
        _convert_llama(model_path, outfile_dir, outtype)
    if model_family == 'gptneox':
        _convert_gptneox(model_path, outfile_dir, outtype)
    if model_family == 'bloom':
        _convert_bloom(model_path, outfile_dir, outtype)
    if model_family == 'starcoder':
        _convert_starcoder(model_path, outfile_dir, outtype)