LLM: support converting to ggml format (#8235)

* add convert

* fix

* fix

* fix

* try

* test

* update check

* fix

* fix
This commit is contained in:
binbin Deng 2023-05-31 15:20:06 +08:00 committed by GitHub
parent c890609d1e
commit 8421af51ae
4 changed files with 1547 additions and 9 deletions

View file

@ -117,6 +117,18 @@ KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
LAMBDA_REGEX = re.compile(r'\blambda\b')
HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
'def', 'async def',
'for', 'async for',
'if', 'elif', 'else',
'try', 'except', 'finally',
'with', 'async with',
'class',
'while',
)))
)
# Work around Python < 2.6 behaviour, which does not generate NL after
# a comment which is on a line by itself.
@ -913,7 +925,8 @@ def module_imports_on_top_of_file(
def compound_statements(logical_line):
r"""Compound statements (on the same line) are generally discouraged.
r"""Compound statements (on the same line) are generally
discouraged.
While sometimes it's okay to put an if/for/while with a small body
on the same line, never do this for multi-clause statements.
@ -943,22 +956,27 @@ def compound_statements(logical_line):
line = logical_line
last_char = len(line) - 1
found = line.find(':')
prev_found = 0
counts = {char: 0 for char in '{}[]()'}
while -1 < found < last_char:
before = line[:found]
if ((before.count('{') <= before.count('}') and # {'a': 1} (dict)
before.count('[') <= before.count(']') and # [1:2] (slice)
before.count('(') <= before.count(')'))): # (annotation)
lambda_kw = LAMBDA_REGEX.search(before)
update_counts(line[prev_found:found], counts)
if ((counts['{'] <= counts['}'] and # {'a': 1} (dict)
counts['['] <= counts[']'] and # [1:2] (slice)
counts['('] <= counts[')']) and # (annotation)
not (sys.version_info >= (3, 8) and
line[found + 1] == '=')): # assignment expression
lambda_kw = LAMBDA_REGEX.search(line, 0, found)
if lambda_kw:
before = line[:lambda_kw.start()].rstrip()
if before[-1:] == '=' and isidentifier(before[:-1].strip()):
if before[-1:] == '=' and before[:-1].strip().isidentifier():
yield 0, ("E731 do not assign a lambda expression, use a "
"def")
break
if before.startswith('def '):
if STARTSWITH_DEF_REGEX.match(line):
yield 0, "E704 multiple statements on one line (def)"
else:
elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
yield found, "E701 multiple statements on one line (colon)"
prev_found = found
found = line.find(':', found + 1)
found = line.find(';')
while -1 < found:
@ -1306,6 +1324,14 @@ def filename_match(filename, patterns, default=True):
return any(fnmatch(filename, pattern) for pattern in patterns)
def update_counts(s, counts):
r"""Adds one to the counts of each appearance of characters in s,
for characters in counts"""
for char in s:
if char in counts:
counts[char] += 1
def _is_eol_token(token):
return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
if COMMENT_WITH_NL:

View file

@ -20,3 +20,4 @@
# only search the first bigdl package and end up finding only one sub-package.
from .quantize import quantize
from .convert import _convert_to_ggml

View file

@ -0,0 +1,105 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===========================================================================
#
# This file is adapted from
# https://github.com/ggerganov/llama.cpp/blob/master/convert.py
#
# MIT License
#
# Copyright (c) 2023 Georgi Gerganov
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from bigdl.llm.utils.common import invalidInputError
from bigdl.llm.utils.convert_util import *
from pathlib import Path
import os
def _convert_llama(model_path, outfile_dir, outtype):
model_path = Path(model_path)
outfile_dir = Path(outfile_dir)
model_plus = load_some_model(model_path)
if model_plus.vocab is not None:
vocab = model_plus.vocab
else:
vocab_dir = model_plus.paths[0].parent
vocab = load_vocab(vocab_dir)
model = model_plus.model
model = do_necessary_conversions(model)
output_type = pick_output_type(model, outtype)
model = convert_to_output_type(model, output_type)
params = Params.guessed(model, output_type)
outfile_path = default_outfile(outfile_dir, params)
OutputFile.write_all(outfile_path, params, model, vocab)
def _convert_gptneox(model_path, outfile_dir, outtype):
_convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
def _convert_bloomz(model_path, outfile_dir, outtype):
_convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
def _convert_to_ggml(model_path: str, outfile_dir: str,
model_family: str = 'llama', outtype: str="fp16"):
"""
Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.
:param model_path: str, path of model, for example `./llama-7b-hf`.
:param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
:param model_family: Which model family your input model belongs to. Default to `llama`.
Now only `llama`/`bloomz`/`gptneox` are supported.
:param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
"""
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
"Now we only support quantization of model \
family('llama', 'bloomz', 'gptneox')",
"{} is not in the list.".format(model_family))
invalidInputError(os.path.exists(model_path),
"The file {} was not found".format(model_path))
invalidInputError(outtype in ['fp32', 'fp16'],
"Now we only support converting to 'fp32'/'fp16' format",
"{} is not in the list.".format(outtype))
# make sure the output directory exists
os.makedirs(outfile_dir, exist_ok=True)
outtype = outtype.replace('p', '')
if model_family == 'llama':
_convert_llama(model_path, outfile_dir, outtype)
if model_family == 'gptneox':
_convert_gptneox(model_path, outfile_dir, outtype)
if model_family == 'bloomz':
_convert_bloomz(model_path, outfile_dir, outtype)

File diff suppressed because it is too large Load diff