diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt index c1b5a788..ecc6c84e 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt @@ -18,20 +18,20 @@ endif() add_library(npu_llm STATIC IMPORTED) set_target_properties(npu_llm PROPERTIES IMPORTED_LOCATION ${LIBRARY_DIR}/npu_llm.lib) -set(TARGET llama-cli-npu) -add_executable(${TARGET} llama-cli-npu.cpp) +set(TARGET llm-cli) +add_executable(${TARGET} llm-cli.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE npu_llm) target_compile_features(${TARGET} PRIVATE cxx_std_17) -add_custom_command(TARGET llama-cli-npu POST_BUILD +add_custom_command(TARGET llm-cli POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBRARY_DIR}/npu_llm.dll ${CMAKE_BINARY_DIR}/Release/ COMMENT "Copying npu_llm.dll to build/Release\n" ) -add_custom_command(TARGET llama-cli-npu POST_BUILD +add_custom_command(TARGET llm-cli POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${DLL_DIR}/ ${CMAKE_BINARY_DIR}/Release/ diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md index b7443017..e15f4193 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md @@ -88,9 +88,9 @@ Arguments info: - `--max-prompt-len MAX_PROMPT_LEN`: argument defining the maximum number of tokens that the input prompt can contain. It is default to be `512`. - `--low-bit LOW_BIT`: argument defining the low bit optimizations that will be applied to the model. Current available options are `"sym_int4"`, `"asym_int4"` and `"sym_int8"`, with `"sym_int4"` as the default. -## 3. Build C++ Example `llama-cli-npu`(Optional) +## 3. Build C++ Example `llm-cli`(Optional) -- You can run below cmake script in cmd to build `llama-cli-npu` by yourself, don't forget to replace below with your own path. +- You can run below cmake script in cmd to build `llm-cli` by yourself, don't forget to replace below with your own path. ```cmd :: under current directory @@ -103,21 +103,21 @@ cmake --build . --config Release -j cd Release ``` -- You can also directly use our released `llama-cli-npu.exe` which has the same usage as this example `llama-cli-npu.cpp` +- You can also directly use our released `llm-cli.exe` which has the same usage as this example `llm-cli.cpp` > [!NOTE] -> Our released `llama-cli-npu.exe` can be found at \bigdl-core-npu +> Our released `llm-cli.exe` can be found at \bigdl-core-npu -## 4. Run `llama-cli-npu` +## 4. Run `llm-cli` -With built `llama-cli-npu`, you can run the example with specified paramaters. For example, +With built `llm-cli`, you can run the example with specified paramaters. For example, ```cmd # Run simple text completion -llama-cli-npu.exe -m -n 64 "AI是什么?" +llm-cli.exe -m -n 64 "AI是什么?" # Run in conversation mode -llama-cli-npu.exe -m -cnv +llm-cli.exe -m -cnv ``` Arguments info: diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-cli.cpp similarity index 100% rename from python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp rename to python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-cli.cpp