[LLM] Setup.py & llm-cli update for windows vnni binary files (#8537)
* update setup.py * update llm-cli
This commit is contained in:
parent
f56b5ade4c
commit
e57db777e0
2 changed files with 27 additions and 5 deletions
|
|
@ -69,7 +69,11 @@ def obtain_lib_urls():
|
|||
windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll",
|
||||
"quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe",
|
||||
"main-llama.exe", "main-gptneox.exe", "main-bloom.exe",
|
||||
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe"]
|
||||
"starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe",
|
||||
"libllama_vnni.dll", "libgptneox_vnni.dll", "libbloom_vnni.dll",
|
||||
"quantize-llama_vnni.exe", "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe",
|
||||
"main-llama_vnni.exe", "main-gptneox_vnni.exe", "main-bloom_vnni.exe",
|
||||
"starcoder_vnni.dll", "quantize-starcoder_vnni.exe", "main-starcoder_vnni.exe"]
|
||||
linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so",
|
||||
"libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so",
|
||||
"quantize-llama", "quantize-gptneox", "quantize-bloom",
|
||||
|
|
@ -152,6 +156,18 @@ def setup_package():
|
|||
"libs/main-starcoder.exe",
|
||||
"libs/starcoder.dll",
|
||||
"libs/quantize-starcoder.exe",
|
||||
"libs/libllama_vnni.dll",
|
||||
"libs/libgptneox_vnni.dll",
|
||||
"libs/libbloom_vnni.dll",
|
||||
"libs/quantize-llama_vnni.exe",
|
||||
"libs/quantize-gptneox_vnni.exe",
|
||||
"libs/quantize-bloom_vnni.exe",
|
||||
"libs/main-llama_vnni.exe",
|
||||
"libs/main-gptneox_vnni.exe",
|
||||
"libs/main-bloom_vnni.exe",
|
||||
"libs/starcoder_vnni.dll",
|
||||
"libs/quantize-starcoder_vnni.exe",
|
||||
"libs/main-starcoder_vnni.exe"
|
||||
]
|
||||
package_data["Linux"] = [
|
||||
"libs/libllama_avx2.so",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
|
||||
$lib_dir = Join-Path $llm_dir "libs"
|
||||
|
||||
|
||||
$vnni_enable = ((python -c "from bigdl.llm.utils.isa_checker import check_avx_vnni;print(check_avx_vnni())").ToLower() -eq "true")
|
||||
$model_family = ""
|
||||
$threads = 8
|
||||
$n_predict = 128
|
||||
|
|
@ -21,28 +23,32 @@ function Display-Help
|
|||
|
||||
function llama
|
||||
{
|
||||
$command = "$lib_dir/main-llama.exe -t $threads -n $n_predict $filteredArguments"
|
||||
$exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" }
|
||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||
Write-Host "$command"
|
||||
Invoke-Expression $command
|
||||
}
|
||||
|
||||
function bloom
|
||||
{
|
||||
$command = "$lib_dir/main-bloom.exe -t $threads -n $n_predict $filteredArguments"
|
||||
$exec_file = if ($vnni_enable) { "main-bloom_vnni.exe" } else { "main-bloom.exe" }
|
||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||
Write-Host "$command"
|
||||
Invoke-Expression $command
|
||||
}
|
||||
|
||||
function gptneox
|
||||
{
|
||||
$command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments"
|
||||
$exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" }
|
||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||
Write-Host "$command"
|
||||
Invoke-Expression $command
|
||||
}
|
||||
|
||||
function starcoder
|
||||
{
|
||||
$command = "$lib_dir/main-starcoder.exe -t $threads -n $n_predict $filteredArguments"
|
||||
$exec_file = if ($vnni_enable) { "main-starcoder_vnni.exe" } else { "main-starcoder.exe" }
|
||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||
Write-Host "$command"
|
||||
Invoke-Expression $command
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue