[LLM] Setup.py & llm-cli update for windows vnni binary files (#8537)
* update setup.py * update llm-cli
This commit is contained in:
parent
f56b5ade4c
commit
e57db777e0
2 changed files with 27 additions and 5 deletions
|
|
@ -69,7 +69,11 @@ def obtain_lib_urls():
|
||||||
windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll",
|
windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll",
|
||||||
"quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe",
|
"quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe",
|
||||||
"main-llama.exe", "main-gptneox.exe", "main-bloom.exe",
|
"main-llama.exe", "main-gptneox.exe", "main-bloom.exe",
|
||||||
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe"]
|
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe",
|
||||||
|
"libllama_vnni.dll", "libgptneox_vnni.dll", "libbloom_vnni.dll",
|
||||||
|
"quantize-llama_vnni.exe", "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe",
|
||||||
|
"main-llama_vnni.exe", "main-gptneox_vnni.exe", "main-bloom_vnni.exe",
|
||||||
|
"starcoder_vnni.dll", "quantize-starcoder_vnni.exe", "main-starcoder_vnni.exe"]
|
||||||
linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so",
|
linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so",
|
||||||
"libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so",
|
"libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so",
|
||||||
"quantize-llama", "quantize-gptneox", "quantize-bloom",
|
"quantize-llama", "quantize-gptneox", "quantize-bloom",
|
||||||
|
|
@ -152,6 +156,18 @@ def setup_package():
|
||||||
"libs/main-starcoder.exe",
|
"libs/main-starcoder.exe",
|
||||||
"libs/starcoder.dll",
|
"libs/starcoder.dll",
|
||||||
"libs/quantize-starcoder.exe",
|
"libs/quantize-starcoder.exe",
|
||||||
|
"libs/libllama_vnni.dll",
|
||||||
|
"libs/libgptneox_vnni.dll",
|
||||||
|
"libs/libbloom_vnni.dll",
|
||||||
|
"libs/quantize-llama_vnni.exe",
|
||||||
|
"libs/quantize-gptneox_vnni.exe",
|
||||||
|
"libs/quantize-bloom_vnni.exe",
|
||||||
|
"libs/main-llama_vnni.exe",
|
||||||
|
"libs/main-gptneox_vnni.exe",
|
||||||
|
"libs/main-bloom_vnni.exe",
|
||||||
|
"libs/starcoder_vnni.dll",
|
||||||
|
"libs/quantize-starcoder_vnni.exe",
|
||||||
|
"libs/main-starcoder_vnni.exe"
|
||||||
]
|
]
|
||||||
package_data["Linux"] = [
|
package_data["Linux"] = [
|
||||||
"libs/libllama_avx2.so",
|
"libs/libllama_avx2.so",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
|
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
|
||||||
$lib_dir = Join-Path $llm_dir "libs"
|
$lib_dir = Join-Path $llm_dir "libs"
|
||||||
|
|
||||||
|
|
||||||
|
$vnni_enable = ((python -c "from bigdl.llm.utils.isa_checker import check_avx_vnni;print(check_avx_vnni())").ToLower() -eq "true")
|
||||||
$model_family = ""
|
$model_family = ""
|
||||||
$threads = 8
|
$threads = 8
|
||||||
$n_predict = 128
|
$n_predict = 128
|
||||||
|
|
@ -21,28 +23,32 @@ function Display-Help
|
||||||
|
|
||||||
function llama
|
function llama
|
||||||
{
|
{
|
||||||
$command = "$lib_dir/main-llama.exe -t $threads -n $n_predict $filteredArguments"
|
$exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" }
|
||||||
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
}
|
}
|
||||||
|
|
||||||
function bloom
|
function bloom
|
||||||
{
|
{
|
||||||
$command = "$lib_dir/main-bloom.exe -t $threads -n $n_predict $filteredArguments"
|
$exec_file = if ($vnni_enable) { "main-bloom_vnni.exe" } else { "main-bloom.exe" }
|
||||||
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
}
|
}
|
||||||
|
|
||||||
function gptneox
|
function gptneox
|
||||||
{
|
{
|
||||||
$command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments"
|
$exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" }
|
||||||
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
}
|
}
|
||||||
|
|
||||||
function starcoder
|
function starcoder
|
||||||
{
|
{
|
||||||
$command = "$lib_dir/main-starcoder.exe -t $threads -n $n_predict $filteredArguments"
|
$exec_file = if ($vnni_enable) { "main-starcoder_vnni.exe" } else { "main-starcoder.exe" }
|
||||||
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
Write-Host "$command"
|
Write-Host "$command"
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue