diff --git a/python/llm/setup.py b/python/llm/setup.py index 536ae5c4..2293acc5 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -80,26 +80,30 @@ windows_binarys = [ "main-chatglm_vnni.exe", ] linux_binarys = [ + # "libllama_avx.so", + # "libgptneox_avx.so", + # "libbloom_avx.so", + # "libstarcoder_avx.so", "libllama_avx2.so", "libgptneox_avx2.so", "libbloom_avx2.so", + "libstarcoder_avx2.so", "libllama_avx512.so", "libgptneox_avx512.so", "libbloom_avx512.so", + "libstarcoder_avx512.so", "quantize-llama", "quantize-gptneox", "quantize-bloom", - "main-llama_avx2", - "main-gptneox_avx2", - "main-bloom_avx2", - "main-llama_avx512", - "main-gptneox_avx512", - "main-bloom_avx512", - "libstarcoder_avx512.so", - "main-starcoder_avx512", "quantize-starcoder", - "libstarcoder_avx2.so", - "main-starcoder_avx2", + "libllama-api.so", + "libgptneox-api.so", + "libbloom-api.so", + "libstarcoder-api.so", + "main-llama", + "main-gptneox", + "main-bloom", + "main-starcoder", "main-chatglm_vnni", ] diff --git a/python/llm/src/bigdl/llm/cli/llm-chat b/python/llm/src/bigdl/llm/cli/llm-chat index 385acf51..0b84171d 100755 --- a/python/llm/src/bigdl/llm/cli/llm-chat +++ b/python/llm/src/bigdl/llm/cli/llm-chat @@ -12,19 +12,6 @@ llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")" lib_dir="$llm_dir/libs" prompts_dir="$llm_dir/cli/prompts" -function get_avx_flags() { - avx="avx2" - if command -v lscpu &>/dev/null; then - msg=$(lscpu) - if [[ $msg == *"avx512_vnni"* ]]; then - avx="avx512" - fi - else - echo "lscpu command not found. Please make sure it is installed." - fi - echo $avx -} - # Function to display help message function display_help { echo "usage: ./llm-chat -x MODEL_FAMILY [-h] [args]" @@ -41,7 +28,7 @@ function display_help { function llama { PROMPT_TEMPLATE="$prompts_dir/chat-with-llm.txt" EXTRA_ARGS+=('-i' '--file' "'$PROMPT_TEMPLATE'" '--reverse-prompt' "'USER:'" '--in-prefix' "' '") - command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" + command="$lib_dir/main-llama -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" echo "$command" eval "$command" } @@ -50,7 +37,7 @@ function gptneox { PROMPT="A chat between a curious human and an artificial intelligence assistant .\ The assistant gives helpful, detailed, and polite answers to the human's questions." EXTRA_ARGS+=('--instruct' '-p' '"$PROMPT"') - command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" + command="$lib_dir/main-gptneox -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" echo "$command" eval "$command" } @@ -82,9 +69,6 @@ while [[ $# -gt 0 ]]; do esac done -avx_flag=$(get_avx_flags) -echo "AVX Flags: $avx_flag" - # Perform actions based on the model_family if [[ "$model_family" == "llama" ]]; then llama diff --git a/python/llm/src/bigdl/llm/cli/llm-cli b/python/llm/src/bigdl/llm/cli/llm-cli index 2e753642..06a62b16 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli +++ b/python/llm/src/bigdl/llm/cli/llm-cli @@ -36,25 +36,25 @@ function display_help { } function llama { - command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}" + command="$lib_dir/main-llama -t $threads -n $n_predict ${filteredArguments[*]}" echo "$command" eval "$command" } function bloom { - command="$lib_dir/main-bloom_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}" + command="$lib_dir/main-bloom -t $threads -n $n_predict ${filteredArguments[*]}" echo "$command" eval "$command" } function gptneox { - command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}" + command="$lib_dir/main-gptneox -t $threads -n $n_predict ${filteredArguments[*]}" echo "$command" eval "$command" } function starcoder { - command="$lib_dir/main-starcoder_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}" + command="$lib_dir/main-starcoder -t $threads -n $n_predict ${filteredArguments[*]}" echo "$command" eval "$command" } diff --git a/python/llm/src/bigdl/llm/utils/utils.py b/python/llm/src/bigdl/llm/utils/utils.py index 8c0b5721..1bfde572 100644 --- a/python/llm/src/bigdl/llm/utils/utils.py +++ b/python/llm/src/bigdl/llm/utils/utils.py @@ -44,16 +44,14 @@ def get_shared_lib_info(lib_base_name: str): else: invalidInputError(False, "Unsupported platform.") - cpuflags = get_cpu_flags() - # Construct the paths to the possible shared library names (python/llm/src/bigdl/llm/libs) _base_path = pathlib.Path(__file__).parent.parent.resolve() _base_path = _base_path / 'libs' # Searching for the library in the current directory under the name "lib{lib_base_name}" # (default name for llmcpp) and "{lib_base_name}" (default name for this repo) _lib_paths = [ - _base_path / f"lib{lib_base_name}{cpuflags}{lib_ext}", - _base_path / f"{lib_base_name}{cpuflags}{lib_ext}", + _base_path / f"lib{lib_base_name}-api{lib_ext}", + _base_path / f"{lib_base_name}-api{lib_ext}", ] return _base_path, _lib_paths