diff --git a/python/llm/src/bigdl/llm/convert_model.py b/python/llm/src/bigdl/llm/convert_model.py index ec7cbda8..d84ea2c5 100644 --- a/python/llm/src/bigdl/llm/convert_model.py +++ b/python/llm/src/bigdl/llm/convert_model.py @@ -98,12 +98,13 @@ def llm_convert(model, outtype.lower()) outfile = os.path.join(outfile, output_filename) + # TODO: delete this when support AutoTokenizer if "tokenizer_path" in _used_args: gptq_tokenizer_path = _used_args["tokenizer_path"] else: gptq_tokenizer_path = None - convert_gptq2ggml(input_path=model, + convert_gptq2ggml(model_path=model, output_path=outfile, tokenizer_path=gptq_tokenizer_path, ) diff --git a/python/llm/src/bigdl/llm/gptq/convert/convert_gptq_to_ggml.py b/python/llm/src/bigdl/llm/gptq/convert/convert_gptq_to_ggml.py index 8cadc95c..8ae3a0ba 100644 --- a/python/llm/src/bigdl/llm/gptq/convert/convert_gptq_to_ggml.py +++ b/python/llm/src/bigdl/llm/gptq/convert/convert_gptq_to_ggml.py @@ -23,22 +23,15 @@ import os import re import sys import json +import warnings import struct import numpy as np import torch from sentencepiece import SentencePieceProcessor +from pathlib import Path from bigdl.llm.utils.common.log4Error import invalidInputError -def find_pt_files(directory): - pt_files = [] - for root, _, files in os.walk(directory): - for file in files: - if file.endswith(".pt"): - pt_files.append(os.path.join(root, file)) - return pt_files - - def write_header(fout, shape, dst_name, ftype_cur): sname = dst_name.encode('utf-8') fout.write(struct.pack("iii", len(shape), len(sname), ftype_cur)) @@ -105,6 +98,7 @@ def convert_q4(src_name, dst_name, model, fout, n_head, permute=False): qweight = model[f"{src_name}.qweight"].numpy().T # transpose # Q4_1 does not support bias; good thing the bias is always all zeros. + # Act-order is not supported. invalidInputError(np.all(g_idx[:-1] <= g_idx[1:]), "Act-order is not supported, please use a no act-order model.") ftype = 3 # Q4_1 @@ -164,13 +158,27 @@ def convert_q4(src_name, dst_name, model, fout, n_head, permute=False): blob.tofile(fout) -def convert_gptq2ggml(input_path, output_path, tokenizer_path=None): - input_models = find_pt_files(input_path) - invalidInputError(len(input_models) == 1, - "Only support pytorch's .pt format now." - f"There should be only one .pt under {input_path}, " - f"but found {len(input_models)} file(s).") - model = torch.load(input_models[0], map_location="cpu") +def find_quantized_model_file(model_path): + model_path = Path(model_path) + for ext in ['.safetensors', '.pt']: + found = list(model_path.glob(f"*{ext}")) + if len(found) > 0: + if len(found) != 1: + warnings.warn(f'Detected {len(found)} {ext} model, use the first one {found[0]}.') + print(f"Detected model file {found[0]}") + return str(found[0]) + + +def convert_gptq2ggml(model_path, output_path, tokenizer_path=None): + input_path = find_quantized_model_file(model_path) + + if input_path.endswith('pt'): + model = torch.load(input_path, map_location="cpu") + elif input_path.endswith('safetensors'): + from safetensors.torch import load_file + model = load_file(input_path) + else: + invalidInputError(False, "unknown input model path, only support .safetensors or .pt file.") n_vocab, n_embd = model['model.embed_tokens.weight'].shape layer_re = r'model\.layers\.([0-9]+)' @@ -182,14 +190,19 @@ def convert_gptq2ggml(input_path, output_path, tokenizer_path=None): n_head = {32: 32, 40: 40, 60: 52, 80: 64}[n_layer] if not tokenizer_path: - tokenizer_path = os.path.join(input_path, "tokenizer.model") + tokenizer_path = os.path.join(model_path, "tokenizer.model") invalidInputError(os.path.isfile(tokenizer_path), f"tokenizer.model was not found under {tokenizer_path}." f"Please specify the tokenizer-path") tokenizer = SentencePieceProcessor(tokenizer_path) + vocab_size = tokenizer.vocab_size() + # TODO: Support AutoTokenizer + # from transformers import AutoTokenizer + # tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + # vocab_size = tokenizer.vocab_size - invalidInputError(tokenizer.vocab_size() == n_vocab, "vocab size not match.") + invalidInputError(vocab_size <= n_vocab, "vocab size not match.") fout = open(output_path, "wb") @@ -205,7 +218,7 @@ def convert_gptq2ggml(input_path, output_path, tokenizer_path=None): fout.write(struct.pack("i" * len(values), *values)) # This loop unchanged from convert-pth-to-ggml.py: - for i in range(tokenizer.vocab_size()): + for i in range(vocab_size): if tokenizer.is_unknown(i): text = " \u2047 ".encode("utf-8") elif tokenizer.is_control(i): @@ -260,5 +273,4 @@ if __name__ == "__main__": fname_model = sys.argv[1] fname_tokenizer = sys.argv[2] out_path = sys.argv[3] - invalidInputError(fname_model.endswith(".pt"), "only support pytorch's .pt format now.") convert_gptq2ggml(fname_model, out_path, tokenizer_path=fname_tokenizer)