LLM: support converting to ggml format (#8235)

* add convert * fix * fix * fix * try * test * update check * fix * fix
2023-05-31 15:20:06 +08:00 · 2023-05-31 15:20:06 +08:00 · 8421af51ae
commit 8421af51ae
parent c890609d1e
4 changed files with 1547 additions and 9 deletions
--- a/python/llm/dev/test/pep8-1.7.0.py
+++ b/python/llm/dev/test/pep8-1.7.0.py
@ -117,6 +117,18 @@ KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
 LAMBDA_REGEX = re.compile(r'\blambda\b')
 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
+STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
+STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
+    r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
+        'def', 'async def',
+        'for', 'async for',
+        'if', 'elif', 'else',
+        'try', 'except', 'finally',
+        'with', 'async with',
+        'class',
+        'while',
+    )))
+)

 # Work around Python < 2.6 behaviour, which does not generate NL after
 # a comment which is on a line by itself.
@ -913,7 +925,8 @@ def module_imports_on_top_of_file(


 def compound_statements(logical_line):
-    r"""Compound statements (on the same line) are generally discouraged.
+    r"""Compound statements (on the same line) are generally
+    discouraged.

    While sometimes it's okay to put an if/for/while with a small body
    on the same line, never do this for multi-clause statements.
@ -943,22 +956,27 @@ def compound_statements(logical_line):
    line = logical_line
    last_char = len(line) - 1
    found = line.find(':')
+    prev_found = 0
+    counts = {char: 0 for char in '{}[]()'}
    while -1 < found < last_char:
-        before = line[:found]
-        if ((before.count('{') <= before.count('}') and   # {'a': 1} (dict)
-             before.count('[') <= before.count(']') and   # [1:2] (slice)
-             before.count('(') <= before.count(')'))):    # (annotation)
-            lambda_kw = LAMBDA_REGEX.search(before)
+        update_counts(line[prev_found:found], counts)
+        if ((counts['{'] <= counts['}'] and   # {'a': 1} (dict)
+             counts['['] <= counts[']'] and   # [1:2] (slice)
+             counts['('] <= counts[')']) and  # (annotation)
+            not (sys.version_info >= (3, 8) and
+                 line[found + 1] == '=')):  # assignment expression
+            lambda_kw = LAMBDA_REGEX.search(line, 0, found)
            if lambda_kw:
                before = line[:lambda_kw.start()].rstrip()
-                if before[-1:] == '=' and isidentifier(before[:-1].strip()):
+                if before[-1:] == '=' and before[:-1].strip().isidentifier():
                    yield 0, ("E731 do not assign a lambda expression, use a "
                              "def")
                break
-            if before.startswith('def '):
+            if STARTSWITH_DEF_REGEX.match(line):
                yield 0, "E704 multiple statements on one line (def)"
-            else:
+            elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
                yield found, "E701 multiple statements on one line (colon)"
+        prev_found = found
        found = line.find(':', found + 1)
    found = line.find(';')
    while -1 < found:
@ -1306,6 +1324,14 @@ def filename_match(filename, patterns, default=True):
    return any(fnmatch(filename, pattern) for pattern in patterns)


+def update_counts(s, counts):
+    r"""Adds one to the counts of each appearance of characters in s,
+        for characters in counts"""
+    for char in s:
+        if char in counts:
+            counts[char] += 1
+
+
 def _is_eol_token(token):
    return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
 if COMMENT_WITH_NL:
--- a/python/llm/src/bigdl/llm/ggml/init.py
+++ b/python/llm/src/bigdl/llm/ggml/init.py
@ -20,3 +20,4 @@
 # only search the first bigdl package and end up finding only one sub-package.

 from .quantize import quantize
+from .convert import _convert_to_ggml
--- a/python/llm/src/bigdl/llm/ggml/convert.py
+++ b/python/llm/src/bigdl/llm/ggml/convert.py
@ -0,0 +1,105 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===========================================================================
+#
+# This file is adapted from
+# https://github.com/ggerganov/llama.cpp/blob/master/convert.py
+#
+# MIT License
+#
+# Copyright (c) 2023 Georgi Gerganov
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from bigdl.llm.utils.common import invalidInputError
+from bigdl.llm.utils.convert_util import *
+from pathlib import Path
+import os
+
+
+def _convert_llama(model_path, outfile_dir, outtype):
+    model_path = Path(model_path)
+    outfile_dir = Path(outfile_dir)
+    model_plus = load_some_model(model_path)
+    if model_plus.vocab is not None:
+        vocab = model_plus.vocab
+    else:
+        vocab_dir = model_plus.paths[0].parent
+        vocab = load_vocab(vocab_dir)
+    model = model_plus.model
+    model = do_necessary_conversions(model)
+    output_type = pick_output_type(model, outtype)
+    model = convert_to_output_type(model, output_type)
+    params = Params.guessed(model, output_type)
+    outfile_path = default_outfile(outfile_dir, params)
+    OutputFile.write_all(outfile_path, params, model, vocab)
+
+
+def _convert_gptneox(model_path, outfile_dir, outtype):
+    _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
+
+
+def _convert_bloomz(model_path, outfile_dir, outtype):
+    _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
+
+
+def _convert_to_ggml(model_path: str, outfile_dir: str,
+                     model_family: str = 'llama', outtype: str="fp16"):
+    """
+    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to ggml format.
+
+    :param model_path: str, path of model, for example `./llama-7b-hf`.
+    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
+    :param model_family: Which model family your input model belongs to. Default to `llama`.
+            Now only `llama`/`bloomz`/`gptneox` are supported.
+    :param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
+    """
+    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
+                      "Now we only support quantization of model \
+                       family('llama', 'bloomz', 'gptneox')",
+                      "{} is not in the list.".format(model_family))
+    invalidInputError(os.path.exists(model_path),
+                      "The file {} was not found".format(model_path))
+    invalidInputError(outtype in ['fp32', 'fp16'],
+                      "Now we only support converting to 'fp32'/'fp16' format",
+                      "{} is not in the list.".format(outtype))
+
+    # make sure the output directory exists
+    os.makedirs(outfile_dir, exist_ok=True)
+
+    outtype = outtype.replace('p', '')
+    if model_family == 'llama':
+        _convert_llama(model_path, outfile_dir, outtype)
+    if model_family == 'gptneox':
+        _convert_gptneox(model_path, outfile_dir, outtype)
+    if model_family == 'bloomz':
+        _convert_bloomz(model_path, outfile_dir, outtype)
--- a/python/llm/src/bigdl/llm/utils/convert_util.py
+++ b/python/llm/src/bigdl/llm/utils/convert_util.py