LLM: Support package/quantize for llama.cpp/redpajama.cpp on Windows (#8236)

* support windows of llama.cpp

* update quantize

* update version of llama.cp submodule

* add gptneox.dll

* add quantize-gptneox.exe
This commit is contained in:
Ruonan Wang 2023-05-31 14:47:12 +08:00 committed by GitHub
parent fa0b0a4555
commit c890609d1e
5 changed files with 129 additions and 17 deletions

View file

@ -32,27 +32,27 @@ add_custom_target(
)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
DESTINATION src/bigdl/llm/lib
DESTINATION src/bigdl/llm/libs
)
install(
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
RENAME quantize-llama
DESTINATION src/bigdl/llm/bin
DESTINATION src/bigdl/llm/libs
)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
DESTINATION src/bigdl/llm/lib
DESTINATION src/bigdl/llm/libs
)
install(
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
DESTINATION src/bigdl/llm/bin
DESTINATION src/bigdl/llm/libs
)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
DESTINATION src/bigdl/llm/lib
DESTINATION src/bigdl/llm/libs
)
install(
PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
RENAME quantize-bloomz
DESTINATION src/bigdl/llm/bin
DESTINATION src/bigdl/llm/libs
)

74
python/llm/setup-linux.py Normal file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env python
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import fnmatch
from skbuild import setup
long_description = '''
BigDL LLM
'''
exclude_patterns = ["*__pycache__*", "*ipynb_checkpoints*"]
BIGDL_PYTHON_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
VERSION = open(os.path.join(BIGDL_PYTHON_HOME, 'version.txt'), 'r').read().strip()
llm_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
def get_llm_packages():
llm_packages = []
for dirpath, _, _ in os.walk(os.path.join(llm_home, "bigdl")):
print(dirpath)
package = dirpath.split(llm_home + os.sep)[1].replace(os.sep, '.')
if any(fnmatch.fnmatchcase(package, pat=pattern)
for pattern in exclude_patterns):
print("excluding", package)
else:
llm_packages.append(package)
print("including", package)
return llm_packages
def setup_package():
metadata = dict(
name='bigdl-llm',
version=VERSION,
description='Large Language Model Develop Toolkit',
long_description=long_description,
long_description_content_type="text/markdown",
author='BigDL Authors',
author_email='bigdl-user-group@googlegroups.com',
license='Apache License, Version 2.0',
url='https://github.com/intel-analytics/BigDL',
packages=get_llm_packages(),
package_dir={"": "src"},
install_requires=[""],
include_package_data=True,
classifiers=[
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: Implementation :: CPython'],
platforms=['linux']
)
setup(**metadata)
if __name__ == '__main__':
setup_package()

View file

@ -18,7 +18,8 @@
import os
import fnmatch
from skbuild import setup
from setuptools import setup
import urllib.request
long_description = '''
BigDL LLM
@ -44,7 +45,36 @@ def get_llm_packages():
return llm_packages
lib_urls = [
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/llama.dll",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-llama.exe",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/gptneox.dll",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
# TODO: add bloomz
]
def download_libs(url: str):
libs_dir = os.path.join(llm_home, "bigdl", "llm", "libs")
if not os.path.exists(libs_dir):
os.makedirs(libs_dir, exist_ok=True)
libso_file_name = url.split('/')[-1]
libso_file = os.path.join(libs_dir, libso_file_name)
if not os.path.exists(libso_file):
urllib.request.urlretrieve(url, libso_file)
def setup_package():
package_data = [
"libs/llama.dll",
"libs/quantize-llama.exe",
"libs/gptneox.dll",
]
for url in lib_urls:
download_libs(url)
metadata = dict(
name='bigdl-llm',
version=VERSION,
@ -57,14 +87,14 @@ def setup_package():
url='https://github.com/intel-analytics/BigDL',
packages=get_llm_packages(),
package_dir={"": "src"},
install_requires=[""],
package_data={"bigdl.llm": package_data},
include_package_data=True,
classifiers=[
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: Implementation :: CPython'],
platforms=['linux', 'windows']
platforms=['windows']
)
setup(**metadata)

View file

@ -17,14 +17,17 @@
import os
import subprocess
from bigdl.llm.utils.common import invalidInputError
import platform
dirname, _ = os.path.split(os.path.abspath(__file__))
bin_dirname = os.path.dirname(dirname)
libs_dirname = os.path.dirname(dirname)
_llama_quantize_type = {"q4_0": 2,
"q4_1": 3,
"q4_2": 5}
"q5_0": 8,
"q5_1": 9,
"q8_0": 7}
_bloomz_quantize_type = {"q4_0": 2,
"q4_1": 3}
_gptneox_quantize_type = {"q4_0": 2,
@ -78,11 +81,16 @@ def quantize(input_path: str, output_path: str=None,
list(quantize_type_map.keys()),
dtype))
quantize_type = quantize_type_map[dtype]
quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname,
model_family,
input_path,
output_path,
str(quantize_type))
if platform.platform().startswith('Windows'):
suffix = '.exe'
else:
suffix = ''
quantize_args = "{0}/libs/quantize-{1}{2} {3} {4} {5}".format(libs_dirname,
model_family,
suffix,
input_path,
output_path,
str(quantize_type))
p = subprocess.Popen(quantize_args.split())
p.communicate()
invalidInputError(not p.returncode,

@ -1 +1 @@
Subproject commit f6ab8e2a03f537e853fef2deb36be89c4f698b05
Subproject commit 5026ad5e525e3bb8354d9169aaf9b147ef8b6b85