[llm] Initial support of package and quantize (#8228)
* first commit of CMakeFiles.txt to include llama & gptneox * initial support of quantize * update cmake for only consider linux now * support quantize interface * update based on comment
This commit is contained in:
parent
ea22416525
commit
4638b85f3e
11 changed files with 234 additions and 20 deletions
58
python/llm/CMakeLists.txt
Normal file
58
python/llm/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
cmake_minimum_required(VERSION 3.4...3.22)
|
||||||
|
|
||||||
|
project(bigdl_llm)
|
||||||
|
|
||||||
|
option(FORCE_CMAKE "Force CMake build of Related CPP project" OFF)
|
||||||
|
|
||||||
|
set(FORCE_CMAKE $ENV{FORCE_CMAKE})
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
|
||||||
|
COMMAND make libllama.so quantize
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp
|
||||||
|
)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
|
||||||
|
COMMAND make libgptneox.so quantize-gptneox
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp
|
||||||
|
)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
|
||||||
|
COMMAND make libbloom.so quantize
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp
|
||||||
|
)
|
||||||
|
add_custom_target(
|
||||||
|
run ALL
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
|
||||||
|
DESTINATION src/bigdl/llm/lib
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
|
||||||
|
RENAME quantize-llama
|
||||||
|
DESTINATION src/bigdl/llm/bin
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
|
||||||
|
DESTINATION src/bigdl/llm/lib
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
|
||||||
|
DESTINATION src/bigdl/llm/bin
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
|
||||||
|
DESTINATION src/bigdl/llm/lib
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
|
||||||
|
RENAME quantize-bloomz
|
||||||
|
DESTINATION src/bigdl/llm/bin
|
||||||
|
)
|
||||||
|
|
@ -16,27 +16,9 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
#
|
|
||||||
# Copyright 2016 The BigDL Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import fnmatch
|
import fnmatch
|
||||||
from setuptools import setup
|
from skbuild import setup
|
||||||
|
|
||||||
long_description = '''
|
long_description = '''
|
||||||
BigDL LLM
|
BigDL LLM
|
||||||
|
|
@ -75,7 +57,7 @@ def setup_package():
|
||||||
url='https://github.com/intel-analytics/BigDL',
|
url='https://github.com/intel-analytics/BigDL',
|
||||||
packages=get_llm_packages(),
|
packages=get_llm_packages(),
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
install_requires=[],
|
install_requires=[""],
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'License :: OSI Approved :: Apache Software License',
|
'License :: OSI Approved :: Apache Software License',
|
||||||
|
|
|
||||||
|
|
@ -18,3 +18,5 @@
|
||||||
# physically located elsewhere.
|
# physically located elsewhere.
|
||||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||||
# only search the first bigdl package and end up finding only one sub-package.
|
# only search the first bigdl package and end up finding only one sub-package.
|
||||||
|
|
||||||
|
from .quantize import quantize
|
||||||
|
|
|
||||||
86
python/llm/src/bigdl/llm/ggml/quantize.py
Normal file
86
python/llm/src/bigdl/llm/ggml/quantize.py
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
#
|
||||||
|
# Copyright 2016 The BigDL Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from bigdl.llm.utils.common import invalidInputError
|
||||||
|
|
||||||
|
|
||||||
|
dirname, _ = os.path.split(os.path.abspath(__file__))
|
||||||
|
bin_dirname = os.path.dirname(dirname)
|
||||||
|
|
||||||
|
_llama_quantize_type = {"q4_0": 2,
|
||||||
|
"q4_1": 3,
|
||||||
|
"q4_2": 5}
|
||||||
|
_bloomz_quantize_type = {"q4_0": 2,
|
||||||
|
"q4_1": 3}
|
||||||
|
_gptneox_quantize_type = {"q4_0": 2,
|
||||||
|
"q4_1": 3,
|
||||||
|
"q4_2": 5,
|
||||||
|
"q5_0": 8,
|
||||||
|
"q5_1": 9,
|
||||||
|
"q8_0": 7}
|
||||||
|
|
||||||
|
_quantize_type = {"llama": _llama_quantize_type,
|
||||||
|
"bloomz": _bloomz_quantize_type,
|
||||||
|
"gptneox": _gptneox_quantize_type}
|
||||||
|
|
||||||
|
_valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) + list(_gptneox_quantize_type.keys()))
|
||||||
|
|
||||||
|
|
||||||
|
def quantize(input_path: str, output_path: str=None, model_family: str = 'llama', dtype: str='q4_0'):
|
||||||
|
"""
|
||||||
|
Quantize ggml file to lower precision.
|
||||||
|
|
||||||
|
:param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
|
||||||
|
:param output_path: Save path of output quantized model. Default to `None`.
|
||||||
|
If you don't specify this parameter, quantized model will be saved in
|
||||||
|
the same directory as the input and just replace precision with quantize_type
|
||||||
|
like `./ggml-model-q4_0.bin`.
|
||||||
|
:param model_family: Which model family your input model belongs to. Default to `llama`.
|
||||||
|
Now only `llama`/`bloomz`/`gptneox` are supported.
|
||||||
|
:param dtype: Quantization method which differs in the resulting model disk size and
|
||||||
|
inference speed. Defalut to `q4_0`. Difference model family may support different types,
|
||||||
|
now the supported list is:
|
||||||
|
llama : "q4_0", "q4_1", "q4_2"
|
||||||
|
bloomz : "q4_0", "q4_1"
|
||||||
|
gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
|
||||||
|
"""
|
||||||
|
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
|
||||||
|
"Now we only support quantization of model family('llama', 'bloomz', 'gptneox')",
|
||||||
|
"{} is not in the list.".format(model_family))
|
||||||
|
invalidInputError(os.path.isfile(input_path),
|
||||||
|
"The file {} was not found".format(input_path))
|
||||||
|
# TODO : multi input model path
|
||||||
|
if output_path is None:
|
||||||
|
output_path = input_path.replace("f16", dtype)
|
||||||
|
# convert quantize type str into corresponding int value
|
||||||
|
quantize_type_map = _quantize_type[model_family]
|
||||||
|
invalidInputError(dtype in quantize_type_map,
|
||||||
|
"{0} model just accept {1} now, but you pass in {2}.".format(
|
||||||
|
model_family,
|
||||||
|
list(quantize_type_map.keys()),
|
||||||
|
dtype))
|
||||||
|
quantize_type = quantize_type_map[dtype]
|
||||||
|
quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname,
|
||||||
|
model_family,
|
||||||
|
input_path,
|
||||||
|
output_path,
|
||||||
|
str(quantize_type))
|
||||||
|
p = subprocess.Popen(quantize_args.split())
|
||||||
|
p.communicate()
|
||||||
|
invalidInputError(not p.returncode,
|
||||||
|
"Fail to quantize {}.".format(str(input_path)))
|
||||||
20
python/llm/src/bigdl/llm/utils/__init__.py
Normal file
20
python/llm/src/bigdl/llm/utils/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
#
|
||||||
|
# Copyright 2016 The BigDL Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||||
|
# physically located elsewhere.
|
||||||
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||||
|
# only search the first bigdl package and end up finding only one sub-package.
|
||||||
22
python/llm/src/bigdl/llm/utils/common/__init__.py
Normal file
22
python/llm/src/bigdl/llm/utils/common/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
#
|
||||||
|
# Copyright 2016 The BigDL Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||||
|
# physically located elsewhere.
|
||||||
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||||
|
# only search the first bigdl package and end up finding only one sub-package.
|
||||||
|
|
||||||
|
from .log4Error import invalidInputError
|
||||||
41
python/llm/src/bigdl/llm/utils/common/log4Error.py
Normal file
41
python/llm/src/bigdl/llm/utils/common/log4Error.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
#
|
||||||
|
# Copyright 2016 The BigDL Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def outputUserMessage(errMsg, fixMsg=None):
|
||||||
|
logger.error(f"\n\n****************************Usage Error************************\n" + errMsg)
|
||||||
|
if fixMsg:
|
||||||
|
logger.error(f"\n\n**************************How to fix***********************\n" + fixMsg)
|
||||||
|
logger.error(f"\n\n****************************Call Stack*************************")
|
||||||
|
|
||||||
|
|
||||||
|
def invalidInputError(condition, errMsg, fixMsg=None):
|
||||||
|
if not condition:
|
||||||
|
outputUserMessage(errMsg, fixMsg)
|
||||||
|
raise RuntimeError(errMsg)
|
||||||
|
|
||||||
|
|
||||||
|
def invalidOperationError(condition, errMsg, fixMsg=None, cause=None):
|
||||||
|
if not condition:
|
||||||
|
outputUserMessage(errMsg, fixMsg)
|
||||||
|
if cause:
|
||||||
|
raise cause
|
||||||
|
else:
|
||||||
|
raise RuntimeError(errMsg)
|
||||||
1
python/llm/vendor/bloomz.cpp
vendored
Submodule
1
python/llm/vendor/bloomz.cpp
vendored
Submodule
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 6d2dee07626b7e8b6af1ada2bfb1bb17de4deaea
|
||||||
1
python/llm/vendor/llama.cpp
vendored
Submodule
1
python/llm/vendor/llama.cpp
vendored
Submodule
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit f6ab8e2a03f537e853fef2deb36be89c4f698b05
|
||||||
1
python/llm/vendor/redpajama.cpp
vendored
Submodule
1
python/llm/vendor/redpajama.cpp
vendored
Submodule
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit bec989c272fd733d1db1069fb47ad5008ae1cd7e
|
||||||
Loading…
Reference in a new issue