[llm] Initial support of package and quantize (#8228)
* first commit of CMakeFiles.txt to include llama & gptneox * initial support of quantize * update cmake for only consider linux now * support quantize interface * update based on comment
This commit is contained in:
		
							parent
							
								
									ea22416525
								
							
						
					
					
						commit
						4638b85f3e
					
				
					 11 changed files with 234 additions and 20 deletions
				
			
		
							
								
								
									
										58
									
								
								python/llm/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								python/llm/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,58 @@
 | 
			
		|||
cmake_minimum_required(VERSION 3.4...3.22)
 | 
			
		||||
 | 
			
		||||
project(bigdl_llm)
 | 
			
		||||
 | 
			
		||||
option(FORCE_CMAKE "Force CMake build of Related CPP project" OFF)
 | 
			
		||||
 | 
			
		||||
set(FORCE_CMAKE $ENV{FORCE_CMAKE})
 | 
			
		||||
 | 
			
		||||
add_custom_command(
 | 
			
		||||
    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
 | 
			
		||||
    COMMAND make libllama.so quantize
 | 
			
		||||
    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp
 | 
			
		||||
)
 | 
			
		||||
add_custom_command(
 | 
			
		||||
    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
 | 
			
		||||
    COMMAND make libgptneox.so quantize-gptneox
 | 
			
		||||
    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp
 | 
			
		||||
)
 | 
			
		||||
add_custom_command(
 | 
			
		||||
    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
 | 
			
		||||
    COMMAND make libbloom.so quantize
 | 
			
		||||
    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp
 | 
			
		||||
)
 | 
			
		||||
add_custom_target(
 | 
			
		||||
    run ALL
 | 
			
		||||
    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
 | 
			
		||||
    DESTINATION src/bigdl/llm/lib
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
 | 
			
		||||
    RENAME quantize-llama
 | 
			
		||||
    DESTINATION src/bigdl/llm/bin
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
 | 
			
		||||
    DESTINATION src/bigdl/llm/lib
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
 | 
			
		||||
    DESTINATION src/bigdl/llm/bin
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
 | 
			
		||||
    DESTINATION src/bigdl/llm/lib
 | 
			
		||||
)
 | 
			
		||||
install(
 | 
			
		||||
    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
 | 
			
		||||
    RENAME quantize-bloomz
 | 
			
		||||
    DESTINATION src/bigdl/llm/bin
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -16,27 +16,9 @@
 | 
			
		|||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import fnmatch
 | 
			
		||||
from setuptools import setup
 | 
			
		||||
from skbuild import setup
 | 
			
		||||
 | 
			
		||||
long_description = '''
 | 
			
		||||
    BigDL LLM
 | 
			
		||||
| 
						 | 
				
			
			@ -75,7 +57,7 @@ def setup_package():
 | 
			
		|||
        url='https://github.com/intel-analytics/BigDL',
 | 
			
		||||
        packages=get_llm_packages(),
 | 
			
		||||
        package_dir={"": "src"},
 | 
			
		||||
        install_requires=[],
 | 
			
		||||
        install_requires=[""],
 | 
			
		||||
        include_package_data=True,
 | 
			
		||||
        classifiers=[
 | 
			
		||||
            'License :: OSI Approved :: Apache Software License',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,3 +18,5 @@
 | 
			
		|||
# physically located elsewhere.
 | 
			
		||||
# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
			
		||||
# only search the first bigdl package and end up finding only one sub-package.
 | 
			
		||||
 | 
			
		||||
from .quantize import quantize
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										86
									
								
								python/llm/src/bigdl/llm/ggml/quantize.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								python/llm/src/bigdl/llm/ggml/quantize.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,86 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import subprocess
 | 
			
		||||
from bigdl.llm.utils.common import invalidInputError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
dirname, _ = os.path.split(os.path.abspath(__file__))
 | 
			
		||||
bin_dirname = os.path.dirname(dirname)
 | 
			
		||||
 | 
			
		||||
_llama_quantize_type = {"q4_0": 2,
 | 
			
		||||
                        "q4_1": 3,
 | 
			
		||||
                        "q4_2": 5}
 | 
			
		||||
_bloomz_quantize_type = {"q4_0": 2,
 | 
			
		||||
                         "q4_1": 3}
 | 
			
		||||
_gptneox_quantize_type = {"q4_0": 2,
 | 
			
		||||
                          "q4_1": 3,
 | 
			
		||||
                          "q4_2": 5,
 | 
			
		||||
                          "q5_0": 8,
 | 
			
		||||
                          "q5_1": 9,
 | 
			
		||||
                          "q8_0": 7}
 | 
			
		||||
 | 
			
		||||
_quantize_type = {"llama": _llama_quantize_type,
 | 
			
		||||
                  "bloomz": _bloomz_quantize_type,
 | 
			
		||||
                  "gptneox": _gptneox_quantize_type}
 | 
			
		||||
 | 
			
		||||
_valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) + list(_gptneox_quantize_type.keys()))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def quantize(input_path: str, output_path: str=None, model_family: str = 'llama', dtype: str='q4_0'):
 | 
			
		||||
    """
 | 
			
		||||
    Quantize ggml file to lower precision.
 | 
			
		||||
 | 
			
		||||
    :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
 | 
			
		||||
    :param output_path: Save path of output quantized model. Default to `None`. 
 | 
			
		||||
            If you don't specify this parameter, quantized model will be saved in
 | 
			
		||||
            the same directory as the input and just replace precision with quantize_type
 | 
			
		||||
            like `./ggml-model-q4_0.bin`.
 | 
			
		||||
    :param model_family: Which model family your input model belongs to. Default to `llama`.
 | 
			
		||||
            Now only `llama`/`bloomz`/`gptneox` are supported.
 | 
			
		||||
    :param dtype: Quantization method which differs in the resulting model disk size and
 | 
			
		||||
            inference speed. Defalut to `q4_0`. Difference model family may support different types,
 | 
			
		||||
            now the supported list is:
 | 
			
		||||
            llama : "q4_0", "q4_1", "q4_2"
 | 
			
		||||
            bloomz : "q4_0", "q4_1"
 | 
			
		||||
            gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
 | 
			
		||||
    """
 | 
			
		||||
    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
 | 
			
		||||
                      "Now we only support quantization of model family('llama', 'bloomz', 'gptneox')",
 | 
			
		||||
                      "{} is not in the list.".format(model_family))
 | 
			
		||||
    invalidInputError(os.path.isfile(input_path),
 | 
			
		||||
                      "The file {} was not found".format(input_path))
 | 
			
		||||
    # TODO : multi input model path
 | 
			
		||||
    if output_path is None:
 | 
			
		||||
        output_path = input_path.replace("f16", dtype)
 | 
			
		||||
    # convert quantize type str into corresponding int value
 | 
			
		||||
    quantize_type_map = _quantize_type[model_family]
 | 
			
		||||
    invalidInputError(dtype in quantize_type_map,
 | 
			
		||||
                      "{0} model just accept {1} now, but you pass in {2}.".format(
 | 
			
		||||
                        model_family,
 | 
			
		||||
                        list(quantize_type_map.keys()),
 | 
			
		||||
                        dtype))
 | 
			
		||||
    quantize_type = quantize_type_map[dtype]
 | 
			
		||||
    quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname,
 | 
			
		||||
                                                              model_family,
 | 
			
		||||
                                                              input_path,
 | 
			
		||||
                                                              output_path,
 | 
			
		||||
                                                              str(quantize_type))
 | 
			
		||||
    p = subprocess.Popen(quantize_args.split())
 | 
			
		||||
    p.communicate()
 | 
			
		||||
    invalidInputError(not p.returncode,
 | 
			
		||||
                      "Fail to quantize {}.".format(str(input_path)))
 | 
			
		||||
							
								
								
									
										20
									
								
								python/llm/src/bigdl/llm/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								python/llm/src/bigdl/llm/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,20 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
 | 
			
		||||
# physically located elsewhere.
 | 
			
		||||
# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
			
		||||
# only search the first bigdl package and end up finding only one sub-package.
 | 
			
		||||
							
								
								
									
										22
									
								
								python/llm/src/bigdl/llm/utils/common/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								python/llm/src/bigdl/llm/utils/common/__init__.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
 | 
			
		||||
# physically located elsewhere.
 | 
			
		||||
# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
			
		||||
# only search the first bigdl package and end up finding only one sub-package.
 | 
			
		||||
 | 
			
		||||
from .log4Error import invalidInputError
 | 
			
		||||
							
								
								
									
										41
									
								
								python/llm/src/bigdl/llm/utils/common/log4Error.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								python/llm/src/bigdl/llm/utils/common/log4Error.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,41 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def outputUserMessage(errMsg, fixMsg=None):
 | 
			
		||||
    logger.error(f"\n\n****************************Usage Error************************\n" + errMsg)
 | 
			
		||||
    if fixMsg:
 | 
			
		||||
        logger.error(f"\n\n**************************How to fix***********************\n" + fixMsg)
 | 
			
		||||
    logger.error(f"\n\n****************************Call Stack*************************")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def invalidInputError(condition, errMsg, fixMsg=None):
 | 
			
		||||
    if not condition:
 | 
			
		||||
        outputUserMessage(errMsg, fixMsg)
 | 
			
		||||
        raise RuntimeError(errMsg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def invalidOperationError(condition, errMsg, fixMsg=None, cause=None):
 | 
			
		||||
    if not condition:
 | 
			
		||||
        outputUserMessage(errMsg, fixMsg)
 | 
			
		||||
        if cause:
 | 
			
		||||
            raise cause
 | 
			
		||||
        else:
 | 
			
		||||
            raise RuntimeError(errMsg)
 | 
			
		||||
							
								
								
									
										1
									
								
								python/llm/vendor/bloomz.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
						
						
									
										1
									
								
								python/llm/vendor/bloomz.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Subproject commit 6d2dee07626b7e8b6af1ada2bfb1bb17de4deaea
 | 
			
		||||
							
								
								
									
										1
									
								
								python/llm/vendor/llama.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
						
						
									
										1
									
								
								python/llm/vendor/llama.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Subproject commit f6ab8e2a03f537e853fef2deb36be89c4f698b05
 | 
			
		||||
							
								
								
									
										1
									
								
								python/llm/vendor/redpajama.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							
							
						
						
									
										1
									
								
								python/llm/vendor/redpajama.cpp
									
									
									
									
										vendored
									
									
										Submodule
									
								
							| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Subproject commit bec989c272fd733d1db1069fb47ad5008ae1cd7e
 | 
			
		||||
		Loading…
	
		Reference in a new issue