[LLM] Revert llm-cli to disable selecting executables on Windows (#8630)
* revert vnni file select * revert setup.py * add model-api.dll
This commit is contained in:
parent
3dbab9087b
commit
3361b66449
4 changed files with 75 additions and 83 deletions
8
.github/workflows/llm-binary-build.yml
vendored
8
.github/workflows/llm-binary-build.yml
vendored
|
|
@ -226,19 +226,19 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
if (Test-Path ./release) { rm -r -fo release }
|
if (Test-Path ./release) { rm -r -fo release }
|
||||||
mkdir release
|
mkdir release
|
||||||
mv build/Release/main-bloom.exe release/main-bloom_vnni.exe
|
# mv build/Release/main-bloom.exe release/main-bloom_vnni.exe
|
||||||
mv build/Release/quantize-bloom.exe release/quantize-bloom_vnni.exe
|
mv build/Release/quantize-bloom.exe release/quantize-bloom_vnni.exe
|
||||||
mv build/Release/bloom.dll release/libbloom_vnni.dll
|
mv build/Release/bloom.dll release/libbloom_vnni.dll
|
||||||
|
|
||||||
mv build/Release/main-llama.exe release/main-llama_vnni.exe
|
# mv build/Release/main-llama.exe release/main-llama_vnni.exe
|
||||||
mv build/Release/quantize-llama.exe release/quantize-llama_vnni.exe
|
mv build/Release/quantize-llama.exe release/quantize-llama_vnni.exe
|
||||||
mv build/Release/llama.dll release/libllama_vnni.dll
|
mv build/Release/llama.dll release/libllama_vnni.dll
|
||||||
|
|
||||||
mv build/Release/main-gptneox.exe release/main-gptneox_vnni.exe
|
# mv build/Release/main-gptneox.exe release/main-gptneox_vnni.exe
|
||||||
mv build/Release/quantize-gptneox.exe release/quantize-gptneox_vnni.exe
|
mv build/Release/quantize-gptneox.exe release/quantize-gptneox_vnni.exe
|
||||||
mv build/Release/gptneox.dll release/libgptneox_vnni.dll
|
mv build/Release/gptneox.dll release/libgptneox_vnni.dll
|
||||||
|
|
||||||
mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
|
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
|
||||||
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
|
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
|
||||||
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
|
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,11 +43,61 @@ long_description = '''
|
||||||
|
|
||||||
exclude_patterns = ["*__pycache__*", "*ipynb_checkpoints*"]
|
exclude_patterns = ["*__pycache__*", "*ipynb_checkpoints*"]
|
||||||
BIGDL_PYTHON_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
BIGDL_PYTHON_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
VERSION = open(os.path.join(BIGDL_PYTHON_HOME, 'version.txt'), 'r').read().strip()
|
VERSION = open(os.path.join(BIGDL_PYTHON_HOME,
|
||||||
|
'version.txt'), 'r').read().strip()
|
||||||
llm_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
|
llm_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
|
||||||
github_artifact_dir = os.path.join(llm_home, '../llm-binary')
|
github_artifact_dir = os.path.join(llm_home, '../llm-binary')
|
||||||
libs_dir = os.path.join(llm_home, "bigdl", "llm", "libs")
|
libs_dir = os.path.join(llm_home, "bigdl", "llm", "libs")
|
||||||
CONVERT_DEP = ['numpy >= 1.22', 'torch', 'transformers', 'sentencepiece', 'accelerate']
|
CONVERT_DEP = ['numpy >= 1.22', 'torch',
|
||||||
|
'transformers', 'sentencepiece', 'accelerate']
|
||||||
|
windows_binarys = [
|
||||||
|
"llama.dll",
|
||||||
|
"gptneox.dll",
|
||||||
|
"bloom.dll",
|
||||||
|
"starcoder.dll",
|
||||||
|
"llama-api.dll",
|
||||||
|
"gptneox-api.dll",
|
||||||
|
"bloom-api.dll",
|
||||||
|
"starcoder-api.dll",
|
||||||
|
"quantize-llama.exe",
|
||||||
|
"quantize-gptneox.exe",
|
||||||
|
"quantize-bloom.exe",
|
||||||
|
"quantize-starcoder.exe",
|
||||||
|
"main-llama.exe",
|
||||||
|
"main-gptneox.exe",
|
||||||
|
"main-bloom.exe",
|
||||||
|
"main-starcoder.exe",
|
||||||
|
"libllama_vnni.dll",
|
||||||
|
"libgptneox_vnni.dll",
|
||||||
|
"libbloom_vnni.dll",
|
||||||
|
"libstarcoder_vnni.dll",
|
||||||
|
"quantize-llama_vnni.exe",
|
||||||
|
"quantize-gptneox_vnni.exe",
|
||||||
|
"quantize-bloom_vnni.exe",
|
||||||
|
"quantize-starcoder_vnni.exe",
|
||||||
|
]
|
||||||
|
linux_binarys = [
|
||||||
|
"libllama_avx2.so",
|
||||||
|
"libgptneox_avx2.so",
|
||||||
|
"libbloom_avx2.so",
|
||||||
|
"libllama_avx512.so",
|
||||||
|
"libgptneox_avx512.so",
|
||||||
|
"libbloom_avx512.so",
|
||||||
|
"quantize-llama",
|
||||||
|
"quantize-gptneox",
|
||||||
|
"quantize-bloom",
|
||||||
|
"main-llama_avx2",
|
||||||
|
"main-gptneox_avx2",
|
||||||
|
"main-bloom_avx2",
|
||||||
|
"main-llama_avx512",
|
||||||
|
"main-gptneox_avx512",
|
||||||
|
"main-bloom_avx512",
|
||||||
|
"libstarcoder_avx512.so",
|
||||||
|
"main-starcoder_avx512",
|
||||||
|
"quantize-starcoder",
|
||||||
|
"libstarcoder_avx2.so",
|
||||||
|
"main-starcoder_avx2"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_llm_packages():
|
def get_llm_packages():
|
||||||
|
|
@ -66,21 +116,6 @@ def get_llm_packages():
|
||||||
|
|
||||||
def obtain_lib_urls():
|
def obtain_lib_urls():
|
||||||
base_url = "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/"
|
base_url = "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/"
|
||||||
windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll",
|
|
||||||
"quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe",
|
|
||||||
"main-llama.exe", "main-gptneox.exe", "main-bloom.exe",
|
|
||||||
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe",
|
|
||||||
"libllama_vnni.dll", "libgptneox_vnni.dll", "libbloom_vnni.dll",
|
|
||||||
"quantize-llama_vnni.exe", "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe",
|
|
||||||
"main-llama_vnni.exe", "main-gptneox_vnni.exe", "main-bloom_vnni.exe",
|
|
||||||
"libstarcoder_vnni.dll", "quantize-starcoder_vnni.exe", "main-starcoder_vnni.exe"]
|
|
||||||
linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so",
|
|
||||||
"libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so",
|
|
||||||
"quantize-llama", "quantize-gptneox", "quantize-bloom",
|
|
||||||
"main-llama_avx2", "main-gptneox_avx2", "main-bloom_avx2",
|
|
||||||
"main-llama_avx512", "main-gptneox_avx512", "main-bloom_avx512",
|
|
||||||
"libstarcoder_avx512.so", "main-starcoder_avx512", "quantize-starcoder",
|
|
||||||
"libstarcoder_avx2.so", "main-starcoder_avx2"]
|
|
||||||
|
|
||||||
def get_date_urls(base_url):
|
def get_date_urls(base_url):
|
||||||
# obtain all urls based on date(format: xxxx-xx-xx)
|
# obtain all urls based on date(format: xxxx-xx-xx)
|
||||||
|
|
@ -143,55 +178,10 @@ def download_libs(url: str, change_permission=False):
|
||||||
|
|
||||||
def setup_package():
|
def setup_package():
|
||||||
package_data = {}
|
package_data = {}
|
||||||
package_data["Windows"] = [
|
package_data["Windows"] = list(map(lambda x: os.path.join('libs', x),
|
||||||
"libs/llama.dll",
|
windows_binarys))
|
||||||
"libs/gptneox.dll",
|
package_data["Linux"] = list(map(lambda x: os.path.join('libs', x),
|
||||||
"libs/bloom.dll",
|
linux_binarys))
|
||||||
"libs/starcoder.dll",
|
|
||||||
"libs/quantize-llama.exe",
|
|
||||||
"libs/quantize-gptneox.exe",
|
|
||||||
"libs/quantize-bloom.exe",
|
|
||||||
"libs/quantize-starcoder.exe",
|
|
||||||
"libs/main-bloom.exe",
|
|
||||||
"libs/main-gptneox.exe",
|
|
||||||
"libs/main-llama.exe",
|
|
||||||
"libs/main-starcoder.exe",
|
|
||||||
"libs/libllama_vnni.dll",
|
|
||||||
"libs/libgptneox_vnni.dll",
|
|
||||||
"libs/libbloom_vnni.dll",
|
|
||||||
"libs/libstarcoder_vnni.dll",
|
|
||||||
"libs/quantize-llama_vnni.exe",
|
|
||||||
"libs/quantize-gptneox_vnni.exe",
|
|
||||||
"libs/quantize-bloom_vnni.exe",
|
|
||||||
"libs/quantize-starcoder_vnni.exe",
|
|
||||||
"libs/main-llama_vnni.exe",
|
|
||||||
"libs/main-gptneox_vnni.exe",
|
|
||||||
"libs/main-bloom_vnni.exe",
|
|
||||||
"libs/main-starcoder_vnni.exe"
|
|
||||||
]
|
|
||||||
package_data["Linux"] = [
|
|
||||||
"libs/libllama_avx2.so",
|
|
||||||
"libs/libllama_avx512.so",
|
|
||||||
"libs/quantize-llama",
|
|
||||||
"libs/libgptneox_avx2.so",
|
|
||||||
"libs/libgptneox_avx512.so",
|
|
||||||
"libs/quantize-gptneox",
|
|
||||||
"libs/libbloom_avx2.so",
|
|
||||||
"libs/libbloom_avx512.so",
|
|
||||||
"libs/quantize-bloom",
|
|
||||||
"libs/libstarcoder_avx512.so",
|
|
||||||
"libs/libstarcoder_avx2.so",
|
|
||||||
"libs/quantize-starcoder",
|
|
||||||
"libs/main-bloom_avx2",
|
|
||||||
"libs/main-bloom_avx512",
|
|
||||||
"libs/main-gptneox_avx2",
|
|
||||||
"libs/main-gptneox_avx512",
|
|
||||||
"libs/main-llama_avx2",
|
|
||||||
"libs/main-llama_avx512",
|
|
||||||
"libs/main-starcoder_avx512",
|
|
||||||
"libs/main-starcoder_avx2",
|
|
||||||
]
|
|
||||||
|
|
||||||
platform_name = None
|
platform_name = None
|
||||||
if "--win" in sys.argv:
|
if "--win" in sys.argv:
|
||||||
platform_name = "Windows"
|
platform_name = "Windows"
|
||||||
|
|
@ -229,7 +219,8 @@ def setup_package():
|
||||||
file_path = os.path.join(libs_dir, os.path.basename(file))
|
file_path = os.path.join(libs_dir, os.path.basename(file))
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
print(f'Could not find package dependency file: {file_path}')
|
print(f'Could not find package dependency file: {file_path}')
|
||||||
raise FileNotFoundError(f'Could not find package dependency file: {file_path}')
|
raise FileNotFoundError(
|
||||||
|
f'Could not find package dependency file: {file_path}')
|
||||||
|
|
||||||
all_requires = ['py-cpuinfo']
|
all_requires = ['py-cpuinfo']
|
||||||
all_requires += CONVERT_DEP
|
all_requires += CONVERT_DEP
|
||||||
|
|
@ -246,7 +237,8 @@ def setup_package():
|
||||||
url='https://github.com/intel-analytics/BigDL',
|
url='https://github.com/intel-analytics/BigDL',
|
||||||
packages=get_llm_packages(),
|
packages=get_llm_packages(),
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
package_data={"bigdl.llm": package_data[platform_name] + ["cli/prompts/*.txt"]},
|
package_data={
|
||||||
|
"bigdl.llm": package_data[platform_name] + ["cli/prompts/*.txt"]},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
entry_points={
|
entry_points={
|
||||||
"console_scripts": [
|
"console_scripts": [
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ function Display-Help
|
||||||
|
|
||||||
function llama
|
function llama
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" }
|
$exec_file = "main-llama.exe"
|
||||||
$prompt_file = Join-Path $prompt_dir "chat-with-llm.txt"
|
$prompt_file = Join-Path $prompt_dir "chat-with-llm.txt"
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict -f $prompt_file -i --color --reverse-prompt 'USER:' --in-prefix ' ' $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict -f $prompt_file -i --color --reverse-prompt 'USER:' --in-prefix ' ' $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
|
|
@ -33,7 +33,7 @@ function llama
|
||||||
|
|
||||||
function gptneox
|
function gptneox
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" }
|
$exec_file = "main-gptneox.exe"
|
||||||
$prompt = "A chat between a curious human and an artificial intelligence assistant.`
|
$prompt = "A chat between a curious human and an artificial intelligence assistant.`
|
||||||
The assistant gives helpful, detailed, and polite answers."
|
The assistant gives helpful, detailed, and polite answers."
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict --color --instruct -p '$prompt' $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict --color --instruct -p '$prompt' $filteredArguments"
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ function Display-Help
|
||||||
|
|
||||||
function llama
|
function llama
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" }
|
$exec_file = "main-llama.exe"
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
|
|
@ -31,7 +31,7 @@ function llama
|
||||||
|
|
||||||
function bloom
|
function bloom
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-bloom_vnni.exe" } else { "main-bloom.exe" }
|
$exec_file = "main-bloom.exe"
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
|
|
@ -39,7 +39,7 @@ function bloom
|
||||||
|
|
||||||
function gptneox
|
function gptneox
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" }
|
$exec_file = "main-gptneox.exe"
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
|
|
@ -47,7 +47,7 @@ function gptneox
|
||||||
|
|
||||||
function starcoder
|
function starcoder
|
||||||
{
|
{
|
||||||
$exec_file = if ($vnni_enable) { "main-starcoder_vnni.exe" } else { "main-starcoder.exe" }
|
$exec_file = "main-starcoder.exe"
|
||||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue