diff --git a/python/llm/CMakeLists.txt b/python/llm/CMakeLists.txt new file mode 100644 index 00000000..166fa673 --- /dev/null +++ b/python/llm/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 3.4...3.22) + +project(bigdl_llm) + +option(FORCE_CMAKE "Force CMake build of Related CPP project" OFF) + +set(FORCE_CMAKE $ENV{FORCE_CMAKE}) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize + COMMAND make libllama.so quantize + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp +) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox + COMMAND make libgptneox.so quantize-gptneox + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp +) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize + COMMAND make libbloom.so quantize + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp +) +add_custom_target( + run ALL + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so + ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize + ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so + ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox + ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so + ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize +) +install( + FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so + DESTINATION src/bigdl/llm/lib +) +install( + PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize + RENAME quantize-llama + DESTINATION src/bigdl/llm/bin +) +install( + FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so + DESTINATION src/bigdl/llm/lib +) +install( + PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox + DESTINATION src/bigdl/llm/bin +) +install( + FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so + DESTINATION src/bigdl/llm/lib +) +install( + PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize + RENAME quantize-bloomz + DESTINATION src/bigdl/llm/bin +) diff --git a/python/llm/readme.md b/python/llm/README.md similarity index 100% rename from python/llm/readme.md rename to python/llm/README.md diff --git a/python/llm/setup.py b/python/llm/setup.py index 6ea10db4..2b162dda 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -16,27 +16,9 @@ # limitations under the License. # -#!/usr/bin/env python - -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - import os import fnmatch -from setuptools import setup +from skbuild import setup long_description = ''' BigDL LLM @@ -75,7 +57,7 @@ def setup_package(): url='https://github.com/intel-analytics/BigDL', packages=get_llm_packages(), package_dir={"": "src"}, - install_requires=[], + install_requires=[""], include_package_data=True, classifiers=[ 'License :: OSI Approved :: Apache Software License', diff --git a/python/llm/src/bigdl/llm/ggml/__init__.py b/python/llm/src/bigdl/llm/ggml/__init__.py index dbdafd2a..54a5122e 100644 --- a/python/llm/src/bigdl/llm/ggml/__init__.py +++ b/python/llm/src/bigdl/llm/ggml/__init__.py @@ -18,3 +18,5 @@ # physically located elsewhere. # Otherwise there would be module not found error in non-pip's setting as Python would # only search the first bigdl package and end up finding only one sub-package. + +from .quantize import quantize diff --git a/python/llm/src/bigdl/llm/ggml/quantize.py b/python/llm/src/bigdl/llm/ggml/quantize.py new file mode 100644 index 00000000..1d739125 --- /dev/null +++ b/python/llm/src/bigdl/llm/ggml/quantize.py @@ -0,0 +1,86 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import subprocess +from bigdl.llm.utils.common import invalidInputError + + +dirname, _ = os.path.split(os.path.abspath(__file__)) +bin_dirname = os.path.dirname(dirname) + +_llama_quantize_type = {"q4_0": 2, + "q4_1": 3, + "q4_2": 5} +_bloomz_quantize_type = {"q4_0": 2, + "q4_1": 3} +_gptneox_quantize_type = {"q4_0": 2, + "q4_1": 3, + "q4_2": 5, + "q5_0": 8, + "q5_1": 9, + "q8_0": 7} + +_quantize_type = {"llama": _llama_quantize_type, + "bloomz": _bloomz_quantize_type, + "gptneox": _gptneox_quantize_type} + +_valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) + list(_gptneox_quantize_type.keys())) + + +def quantize(input_path: str, output_path: str=None, model_family: str = 'llama', dtype: str='q4_0'): + """ + Quantize ggml file to lower precision. + + :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`. + :param output_path: Save path of output quantized model. Default to `None`. + If you don't specify this parameter, quantized model will be saved in + the same directory as the input and just replace precision with quantize_type + like `./ggml-model-q4_0.bin`. + :param model_family: Which model family your input model belongs to. Default to `llama`. + Now only `llama`/`bloomz`/`gptneox` are supported. + :param dtype: Quantization method which differs in the resulting model disk size and + inference speed. Defalut to `q4_0`. Difference model family may support different types, + now the supported list is: + llama : "q4_0", "q4_1", "q4_2" + bloomz : "q4_0", "q4_1" + gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" + """ + invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'], + "Now we only support quantization of model family('llama', 'bloomz', 'gptneox')", + "{} is not in the list.".format(model_family)) + invalidInputError(os.path.isfile(input_path), + "The file {} was not found".format(input_path)) + # TODO : multi input model path + if output_path is None: + output_path = input_path.replace("f16", dtype) + # convert quantize type str into corresponding int value + quantize_type_map = _quantize_type[model_family] + invalidInputError(dtype in quantize_type_map, + "{0} model just accept {1} now, but you pass in {2}.".format( + model_family, + list(quantize_type_map.keys()), + dtype)) + quantize_type = quantize_type_map[dtype] + quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname, + model_family, + input_path, + output_path, + str(quantize_type)) + p = subprocess.Popen(quantize_args.split()) + p.communicate() + invalidInputError(not p.returncode, + "Fail to quantize {}.".format(str(input_path))) diff --git a/python/llm/src/bigdl/llm/utils/__init__.py b/python/llm/src/bigdl/llm/utils/__init__.py new file mode 100644 index 00000000..dbdafd2a --- /dev/null +++ b/python/llm/src/bigdl/llm/utils/__init__.py @@ -0,0 +1,20 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. diff --git a/python/llm/src/bigdl/llm/utils/common/__init__.py b/python/llm/src/bigdl/llm/utils/common/__init__.py new file mode 100644 index 00000000..7d318395 --- /dev/null +++ b/python/llm/src/bigdl/llm/utils/common/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +from .log4Error import invalidInputError diff --git a/python/llm/src/bigdl/llm/utils/common/log4Error.py b/python/llm/src/bigdl/llm/utils/common/log4Error.py new file mode 100644 index 00000000..9db756f0 --- /dev/null +++ b/python/llm/src/bigdl/llm/utils/common/log4Error.py @@ -0,0 +1,41 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging + +logger = logging.getLogger(__name__) + + +def outputUserMessage(errMsg, fixMsg=None): + logger.error(f"\n\n****************************Usage Error************************\n" + errMsg) + if fixMsg: + logger.error(f"\n\n**************************How to fix***********************\n" + fixMsg) + logger.error(f"\n\n****************************Call Stack*************************") + + +def invalidInputError(condition, errMsg, fixMsg=None): + if not condition: + outputUserMessage(errMsg, fixMsg) + raise RuntimeError(errMsg) + + +def invalidOperationError(condition, errMsg, fixMsg=None, cause=None): + if not condition: + outputUserMessage(errMsg, fixMsg) + if cause: + raise cause + else: + raise RuntimeError(errMsg) diff --git a/python/llm/vendor/bloomz.cpp b/python/llm/vendor/bloomz.cpp new file mode 160000 index 00000000..6d2dee07 --- /dev/null +++ b/python/llm/vendor/bloomz.cpp @@ -0,0 +1 @@ +Subproject commit 6d2dee07626b7e8b6af1ada2bfb1bb17de4deaea diff --git a/python/llm/vendor/llama.cpp b/python/llm/vendor/llama.cpp new file mode 160000 index 00000000..f6ab8e2a --- /dev/null +++ b/python/llm/vendor/llama.cpp @@ -0,0 +1 @@ +Subproject commit f6ab8e2a03f537e853fef2deb36be89c4f698b05 diff --git a/python/llm/vendor/redpajama.cpp b/python/llm/vendor/redpajama.cpp new file mode 160000 index 00000000..bec989c2 --- /dev/null +++ b/python/llm/vendor/redpajama.cpp @@ -0,0 +1 @@ +Subproject commit bec989c272fd733d1db1069fb47ad5008ae1cd7e