[LLM] Support linux cpp dynamic load .so (#8655)
* support linux cpp dynamic load .so * update cli
This commit is contained in:
		
							parent
							
								
									ca998cc6f2
								
							
						
					
					
						commit
						119bf6d710
					
				
					 4 changed files with 22 additions and 36 deletions
				
			
		| 
						 | 
				
			
			@ -80,26 +80,30 @@ windows_binarys = [
 | 
			
		|||
    "main-chatglm_vnni.exe",
 | 
			
		||||
]
 | 
			
		||||
linux_binarys = [
 | 
			
		||||
    # "libllama_avx.so",
 | 
			
		||||
    # "libgptneox_avx.so",
 | 
			
		||||
    # "libbloom_avx.so",
 | 
			
		||||
    # "libstarcoder_avx.so",
 | 
			
		||||
    "libllama_avx2.so",
 | 
			
		||||
    "libgptneox_avx2.so",
 | 
			
		||||
    "libbloom_avx2.so",
 | 
			
		||||
    "libstarcoder_avx2.so",
 | 
			
		||||
    "libllama_avx512.so",
 | 
			
		||||
    "libgptneox_avx512.so",
 | 
			
		||||
    "libbloom_avx512.so",
 | 
			
		||||
    "libstarcoder_avx512.so",
 | 
			
		||||
    "quantize-llama",
 | 
			
		||||
    "quantize-gptneox",
 | 
			
		||||
    "quantize-bloom",
 | 
			
		||||
    "main-llama_avx2",
 | 
			
		||||
    "main-gptneox_avx2",
 | 
			
		||||
    "main-bloom_avx2",
 | 
			
		||||
    "main-llama_avx512",
 | 
			
		||||
    "main-gptneox_avx512",
 | 
			
		||||
    "main-bloom_avx512",
 | 
			
		||||
    "libstarcoder_avx512.so",
 | 
			
		||||
    "main-starcoder_avx512",
 | 
			
		||||
    "quantize-starcoder",
 | 
			
		||||
    "libstarcoder_avx2.so",
 | 
			
		||||
    "main-starcoder_avx2",
 | 
			
		||||
    "libllama-api.so",
 | 
			
		||||
    "libgptneox-api.so",
 | 
			
		||||
    "libbloom-api.so",
 | 
			
		||||
    "libstarcoder-api.so",
 | 
			
		||||
    "main-llama",
 | 
			
		||||
    "main-gptneox",
 | 
			
		||||
    "main-bloom",
 | 
			
		||||
    "main-starcoder",
 | 
			
		||||
    
 | 
			
		||||
    "main-chatglm_vnni",
 | 
			
		||||
]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,19 +12,6 @@ llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
 | 
			
		|||
lib_dir="$llm_dir/libs"
 | 
			
		||||
prompts_dir="$llm_dir/cli/prompts"
 | 
			
		||||
 | 
			
		||||
function get_avx_flags() {
 | 
			
		||||
  avx="avx2"
 | 
			
		||||
  if command -v lscpu &>/dev/null; then
 | 
			
		||||
    msg=$(lscpu)
 | 
			
		||||
    if [[ $msg == *"avx512_vnni"* ]]; then
 | 
			
		||||
      avx="avx512"
 | 
			
		||||
    fi
 | 
			
		||||
  else
 | 
			
		||||
    echo "lscpu command not found. Please make sure it is installed."
 | 
			
		||||
  fi
 | 
			
		||||
  echo $avx
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Function to display help message
 | 
			
		||||
function display_help {
 | 
			
		||||
  echo "usage: ./llm-chat -x MODEL_FAMILY [-h] [args]"
 | 
			
		||||
| 
						 | 
				
			
			@ -41,7 +28,7 @@ function display_help {
 | 
			
		|||
function llama {
 | 
			
		||||
  PROMPT_TEMPLATE="$prompts_dir/chat-with-llm.txt"
 | 
			
		||||
  EXTRA_ARGS+=('-i' '--file' "'$PROMPT_TEMPLATE'" '--reverse-prompt' "'USER:'" '--in-prefix' "' '")
 | 
			
		||||
  command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
 | 
			
		||||
  command="$lib_dir/main-llama -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -50,7 +37,7 @@ function gptneox {
 | 
			
		|||
  PROMPT="A chat between a curious human <human> and an artificial intelligence assistant <bot>.\
 | 
			
		||||
  The assistant gives helpful, detailed, and polite answers to the human's questions."
 | 
			
		||||
  EXTRA_ARGS+=('--instruct' '-p' '"$PROMPT"')
 | 
			
		||||
  command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
 | 
			
		||||
  command="$lib_dir/main-gptneox -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -82,9 +69,6 @@ while [[ $# -gt 0 ]]; do
 | 
			
		|||
  esac
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
avx_flag=$(get_avx_flags)
 | 
			
		||||
echo "AVX Flags: $avx_flag"
 | 
			
		||||
 | 
			
		||||
# Perform actions based on the model_family
 | 
			
		||||
if [[ "$model_family" == "llama" ]]; then
 | 
			
		||||
  llama
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -36,25 +36,25 @@ function display_help {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
function llama {
 | 
			
		||||
  command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  command="$lib_dir/main-llama -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function bloom {
 | 
			
		||||
  command="$lib_dir/main-bloom_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  command="$lib_dir/main-bloom -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function gptneox {
 | 
			
		||||
  command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  command="$lib_dir/main-gptneox -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function starcoder {
 | 
			
		||||
  command="$lib_dir/main-starcoder_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  command="$lib_dir/main-starcoder -t $threads -n $n_predict ${filteredArguments[*]}"
 | 
			
		||||
  echo "$command"
 | 
			
		||||
  eval "$command"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -44,16 +44,14 @@ def get_shared_lib_info(lib_base_name: str):
 | 
			
		|||
    else:
 | 
			
		||||
        invalidInputError(False, "Unsupported platform.")
 | 
			
		||||
 | 
			
		||||
    cpuflags = get_cpu_flags()
 | 
			
		||||
 | 
			
		||||
    # Construct the paths to the possible shared library names (python/llm/src/bigdl/llm/libs)
 | 
			
		||||
    _base_path = pathlib.Path(__file__).parent.parent.resolve()
 | 
			
		||||
    _base_path = _base_path / 'libs'
 | 
			
		||||
    # Searching for the library in the current directory under the name "lib{lib_base_name}"
 | 
			
		||||
    # (default name for llmcpp) and "{lib_base_name}" (default name for this repo)
 | 
			
		||||
    _lib_paths = [
 | 
			
		||||
        _base_path / f"lib{lib_base_name}{cpuflags}{lib_ext}",
 | 
			
		||||
        _base_path / f"{lib_base_name}{cpuflags}{lib_ext}",
 | 
			
		||||
        _base_path / f"lib{lib_base_name}-api{lib_ext}",
 | 
			
		||||
        _base_path / f"{lib_base_name}-api{lib_ext}",
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    return _base_path, _lib_paths
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue