LLM: support converting to ggml format (#8235)
* add convert * fix * fix * fix * try * test * update check * fix * fix
This commit is contained in:
parent
c890609d1e
commit
8421af51ae
4 changed files with 1547 additions and 9 deletions
|
|
@ -117,6 +117,18 @@ KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
|
|||
OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
|
||||
LAMBDA_REGEX = re.compile(r'\blambda\b')
|
||||
HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
|
||||
STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
|
||||
STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
|
||||
r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
|
||||
'def', 'async def',
|
||||
'for', 'async for',
|
||||
'if', 'elif', 'else',
|
||||
'try', 'except', 'finally',
|
||||
'with', 'async with',
|
||||
'class',
|
||||
'while',
|
||||
)))
|
||||
)
|
||||
|
||||
# Work around Python < 2.6 behaviour, which does not generate NL after
|
||||
# a comment which is on a line by itself.
|
||||
|
|
@ -913,7 +925,8 @@ def module_imports_on_top_of_file(
|
|||
|
||||
|
||||
def compound_statements(logical_line):
|
||||
r"""Compound statements (on the same line) are generally discouraged.
|
||||
r"""Compound statements (on the same line) are generally
|
||||
discouraged.
|
||||
|
||||
While sometimes it's okay to put an if/for/while with a small body
|
||||
on the same line, never do this for multi-clause statements.
|
||||
|
|
@ -943,22 +956,27 @@ def compound_statements(logical_line):
|
|||
line = logical_line
|
||||
last_char = len(line) - 1
|
||||
found = line.find(':')
|
||||
prev_found = 0
|
||||
counts = {char: 0 for char in '{}[]()'}
|
||||
while -1 < found < last_char:
|
||||
before = line[:found]
|
||||
if ((before.count('{') <= before.count('}') and # {'a': 1} (dict)
|
||||
before.count('[') <= before.count(']') and # [1:2] (slice)
|
||||
before.count('(') <= before.count(')'))): # (annotation)
|
||||
lambda_kw = LAMBDA_REGEX.search(before)
|
||||
update_counts(line[prev_found:found], counts)
|
||||
if ((counts['{'] <= counts['}'] and # {'a': 1} (dict)
|
||||
counts['['] <= counts[']'] and # [1:2] (slice)
|
||||
counts['('] <= counts[')']) and # (annotation)
|
||||
not (sys.version_info >= (3, 8) and
|
||||
line[found + 1] == '=')): # assignment expression
|
||||
lambda_kw = LAMBDA_REGEX.search(line, 0, found)
|
||||
if lambda_kw:
|
||||
before = line[:lambda_kw.start()].rstrip()
|
||||
if before[-1:] == '=' and isidentifier(before[:-1].strip()):
|
||||
if before[-1:] == '=' and before[:-1].strip().isidentifier():
|
||||
yield 0, ("E731 do not assign a lambda expression, use a "
|
||||
"def")
|
||||
break
|
||||
if before.startswith('def '):
|
||||
if STARTSWITH_DEF_REGEX.match(line):
|
||||
yield 0, "E704 multiple statements on one line (def)"
|
||||
else:
|
||||
elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
|
||||
yield found, "E701 multiple statements on one line (colon)"
|
||||
prev_found = found
|
||||
found = line.find(':', found + 1)
|
||||
found = line.find(';')
|
||||
while -1 < found:
|
||||
|
|
@ -1306,6 +1324,14 @@ def filename_match(filename, patterns, default=True):
|
|||
return any(fnmatch(filename, pattern) for pattern in patterns)
|
||||
|
||||
|
||||
def update_counts(s, counts):
|
||||
r"""Adds one to the counts of each appearance of characters in s,
|
||||
for characters in counts"""
|
||||
for char in s:
|
||||
if char in counts:
|
||||
counts[char] += 1
|
||||
|
||||
|
||||
def _is_eol_token(token):
|
||||
return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
|
||||
if COMMENT_WITH_NL:
|
||||
|
|
|
|||
|
|
@ -20,3 +20,4 @@
|
|||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
from .quantize import quantize
|
||||
from .convert import _convert_to_ggml
|
||||
|
|
|
|||
105
python/llm/src/bigdl/llm/ggml/convert.py
Normal file
105
python/llm/src/bigdl/llm/ggml/convert.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ===========================================================================
|
||||
#
|
||||
# This file is adapted from
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/convert.py
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Georgi Gerganov
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from bigdl.llm.utils.common import invalidInputError
|
||||
from bigdl.llm.utils.convert_util import *
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
|
||||
def _convert_llama(model_path, outfile_dir, outtype):
|
||||
model_path = Path(model_path)
|
||||
outfile_dir = Path(outfile_dir)
|
||||
model_plus = load_some_model(model_path)
|
||||
if model_plus.vocab is not None:
|
||||
vocab = model_plus.vocab
|
||||
else:
|
||||
vocab_dir = model_plus.paths[0].parent
|
||||
vocab = load_vocab(vocab_dir)
|
||||
model = model_plus.model
|
||||
model = do_necessary_conversions(model)
|
||||
output_type = pick_output_type(model, outtype)
|
||||
model = convert_to_output_type(model, output_type)
|
||||
params = Params.guessed(model, output_type)
|
||||
outfile_path = default_outfile(outfile_dir, params)
|
||||
OutputFile.write_all(outfile_path, params, model, vocab)
|
||||
|
||||
|
||||
def _convert_gptneox(model_path, outfile_dir, outtype):
|
||||
_convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
|
||||
|
||||
|
||||
def _convert_bloomz(model_path, outfile_dir, outtype):
|
||||
_convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
|
||||
|
||||
|
||||
def _convert_to_ggml(model_path: str, outfile_dir: str,
|
||||
model_family: str = 'llama', outtype: str="fp16"):
|
||||
"""
|
||||
Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.
|
||||
|
||||
:param model_path: str, path of model, for example `./llama-7b-hf`.
|
||||
:param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
|
||||
:param model_family: Which model family your input model belongs to. Default to `llama`.
|
||||
Now only `llama`/`bloomz`/`gptneox` are supported.
|
||||
:param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
|
||||
"""
|
||||
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
|
||||
"Now we only support quantization of model \
|
||||
family('llama', 'bloomz', 'gptneox')",
|
||||
"{} is not in the list.".format(model_family))
|
||||
invalidInputError(os.path.exists(model_path),
|
||||
"The file {} was not found".format(model_path))
|
||||
invalidInputError(outtype in ['fp32', 'fp16'],
|
||||
"Now we only support converting to 'fp32'/'fp16' format",
|
||||
"{} is not in the list.".format(outtype))
|
||||
|
||||
# make sure the output directory exists
|
||||
os.makedirs(outfile_dir, exist_ok=True)
|
||||
|
||||
outtype = outtype.replace('p', '')
|
||||
if model_family == 'llama':
|
||||
_convert_llama(model_path, outfile_dir, outtype)
|
||||
if model_family == 'gptneox':
|
||||
_convert_gptneox(model_path, outfile_dir, outtype)
|
||||
if model_family == 'bloomz':
|
||||
_convert_bloomz(model_path, outfile_dir, outtype)
|
||||
1406
python/llm/src/bigdl/llm/utils/convert_util.py
Normal file
1406
python/llm/src/bigdl/llm/utils/convert_util.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue