diff --git a/python/llm/setup.py b/python/llm/setup.py index 198fc063..f5605ef7 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -69,7 +69,11 @@ def obtain_lib_urls(): windows_binarys = ["llama.dll", "gptneox.dll", "bloom.dll", "quantize-llama.exe", "quantize-gptneox.exe", "quantize-bloom.exe", "main-llama.exe", "main-gptneox.exe", "main-bloom.exe", - "starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe"] + "starcoder.dll", "quantize-starcoder.exe", "main-starcoder.exe", + "libllama_vnni.dll", "libgptneox_vnni.dll", "libbloom_vnni.dll", + "quantize-llama_vnni.exe", "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe", + "main-llama_vnni.exe", "main-gptneox_vnni.exe", "main-bloom_vnni.exe", + "starcoder_vnni.dll", "quantize-starcoder_vnni.exe", "main-starcoder_vnni.exe"] linux_binarys = ["libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so", "libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so", "quantize-llama", "quantize-gptneox", "quantize-bloom", @@ -152,6 +156,18 @@ def setup_package(): "libs/main-starcoder.exe", "libs/starcoder.dll", "libs/quantize-starcoder.exe", + "libs/libllama_vnni.dll", + "libs/libgptneox_vnni.dll", + "libs/libbloom_vnni.dll", + "libs/quantize-llama_vnni.exe", + "libs/quantize-gptneox_vnni.exe", + "libs/quantize-bloom_vnni.exe", + "libs/main-llama_vnni.exe", + "libs/main-gptneox_vnni.exe", + "libs/main-bloom_vnni.exe", + "libs/starcoder_vnni.dll", + "libs/quantize-starcoder_vnni.exe", + "libs/main-starcoder_vnni.exe" ] package_data["Linux"] = [ "libs/libllama_avx2.so", diff --git a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 index cef9ca76..f541d54b 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 +++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 @@ -1,6 +1,8 @@ $llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)")) $lib_dir = Join-Path $llm_dir "libs" + +$vnni_enable = ((python -c "from bigdl.llm.utils.isa_checker import check_avx_vnni;print(check_avx_vnni())").ToLower() -eq "true") $model_family = "" $threads = 8 $n_predict = 128 @@ -21,28 +23,32 @@ function Display-Help function llama { - $command = "$lib_dir/main-llama.exe -t $threads -n $n_predict $filteredArguments" + $exec_file = if ($vnni_enable) { "main-llama_vnni.exe" } else { "main-llama.exe" } + $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments" Write-Host "$command" Invoke-Expression $command } function bloom { - $command = "$lib_dir/main-bloom.exe -t $threads -n $n_predict $filteredArguments" + $exec_file = if ($vnni_enable) { "main-bloom_vnni.exe" } else { "main-bloom.exe" } + $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments" Write-Host "$command" Invoke-Expression $command } function gptneox { - $command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments" + $exec_file = if ($vnni_enable) { "main-gptneox_vnni.exe" } else { "main-gptneox.exe" } + $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments" Write-Host "$command" Invoke-Expression $command } function starcoder { - $command = "$lib_dir/main-starcoder.exe -t $threads -n $n_predict $filteredArguments" + $exec_file = if ($vnni_enable) { "main-starcoder_vnni.exe" } else { "main-starcoder.exe" } + $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments" Write-Host "$command" Invoke-Expression $command }