[LLM] Setup.py & llm-cli update for windows vnni binary files (#8537)

* update setup.py
* update llm-cli
This commit is contained in:
xingyuan li 2023-07-17 12:28:38 +09:00 committed by GitHub
parent f56b5ade4c
commit e57db777e0
2 changed files with 27 additions and 5 deletions

View file

@ -69,7 +69,11 @@ def obtain_lib_urls():
windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll", windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll",
"quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe", "quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe",
"main-llama.exe", "main-gptneox.exe", "main-bloom.exe", "main-llama.exe", "main-gptneox.exe", "main-bloom.exe",
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe"] "starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe",
"libllama_vnni.dll", "libgptneox_vnni.dll", "libbloom_vnni.dll",
"quantize-llama_vnni.exe", "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe",
"main-llama_vnni.exe", "main-gptneox_vnni.exe", "main-bloom_vnni.exe",
"starcoder_vnni.dll", "quantize-starcoder_vnni.exe", "main-starcoder_vnni.exe"]
linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so", linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so",
"libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so", "libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so",
"quantize-llama", "quantize-gptneox", "quantize-bloom", "quantize-llama", "quantize-gptneox", "quantize-bloom",
@ -152,6 +156,18 @@ def setup_package():
"libs/main-starcoder.exe", "libs/main-starcoder.exe",
"libs/starcoder.dll", "libs/starcoder.dll",
"libs/quantize-starcoder.exe", "libs/quantize-starcoder.exe",
"libs/libllama_vnni.dll",
"libs/libgptneox_vnni.dll",
"libs/libbloom_vnni.dll",
"libs/quantize-llama_vnni.exe",
"libs/quantize-gptneox_vnni.exe",
"libs/quantize-bloom_vnni.exe",
"libs/main-llama_vnni.exe",
"libs/main-gptneox_vnni.exe",
"libs/main-bloom_vnni.exe",
"libs/starcoder_vnni.dll",
"libs/quantize-starcoder_vnni.exe",
"libs/main-starcoder_vnni.exe"
] ]
package_data["Linux"] = [ package_data["Linux"] = [
"libs/libllama_avx2.so", "libs/libllama_avx2.so",

View file

@ -1,6 +1,8 @@
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)")) $llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
$lib_dir = Join-Path $llm_dir "libs" $lib_dir = Join-Path $llm_dir "libs"
$vnni_enable = ((python -c "from bigdl.llm.utils.isa_checker import check_avx_vnni;print(check_avx_vnni())").ToLower() -eq "true")
$model_family = "" $model_family = ""
$threads = 8 $threads = 8
$n_predict = 128 $n_predict = 128
@ -21,28 +23,32 @@ function Display-Help
function llama function llama
{ {
$command = "$lib_dir/main-llama.exe -t $threads -n $n_predict $filteredArguments" $exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" }
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
Write-Host "$command" Write-Host "$command"
Invoke-Expression $command Invoke-Expression $command
} }
function bloom function bloom
{ {
$command = "$lib_dir/main-bloom.exe -t $threads -n $n_predict $filteredArguments" $exec_file = if ($vnni_enable) { "main-bloom_vnni.exe" } else { "main-bloom.exe" }
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
Write-Host "$command" Write-Host "$command"
Invoke-Expression $command Invoke-Expression $command
} }
function gptneox function gptneox
{ {
$command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments" $exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" }
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
Write-Host "$command" Write-Host "$command"
Invoke-Expression $command Invoke-Expression $command
} }
function starcoder function starcoder
{ {
$command = "$lib_dir/main-starcoder.exe -t $threads -n $n_predict $filteredArguments" $exec_file = if ($vnni_enable) { "main-starcoder_vnni.exe" } else { "main-starcoder.exe" }
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
Write-Host "$command" Write-Host "$command"
Invoke-Expression $command Invoke-Expression $command
} }