diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt
index c1b5a788..ecc6c84e 100644
--- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt
+++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/CMakeLists.txt
@@ -18,20 +18,20 @@ endif()
 add_library(npu_llm STATIC IMPORTED)
 set_target_properties(npu_llm PROPERTIES IMPORTED_LOCATION ${LIBRARY_DIR}/npu_llm.lib)
 
-set(TARGET llama-cli-npu)
-add_executable(${TARGET} llama-cli-npu.cpp)
+set(TARGET llm-cli)
+add_executable(${TARGET} llm-cli.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE npu_llm)
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
 
-add_custom_command(TARGET llama-cli-npu POST_BUILD
+add_custom_command(TARGET llm-cli POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_if_different
         ${LIBRARY_DIR}/npu_llm.dll
         ${CMAKE_BINARY_DIR}/Release/
     COMMENT "Copying npu_llm.dll to build/Release\n"
 )
 
-add_custom_command(TARGET llama-cli-npu POST_BUILD
+add_custom_command(TARGET llm-cli POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_directory
         ${DLL_DIR}/
         ${CMAKE_BINARY_DIR}/Release/
diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md
index b7443017..e15f4193 100644
--- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md
+++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md
@@ -88,9 +88,9 @@ Arguments info:
 - `--max-prompt-len MAX_PROMPT_LEN`: argument defining the maximum number of tokens that the input prompt can contain. It is default to be `512`.
 - `--low-bit LOW_BIT`: argument defining the low bit optimizations that will be applied to the model. Current available options are `"sym_int4"`, `"asym_int4"` and `"sym_int8"`, with `"sym_int4"` as the default.
 
-## 3. Build C++ Example `llama-cli-npu`(Optional)
+## 3. Build C++ Example `llm-cli`(Optional)
 
-- You can run below cmake script in cmd to build `llama-cli-npu` by yourself, don't forget to replace below <CONDA_ENV_DIR> with your own path.
+- You can run below cmake script in cmd to build `llm-cli` by yourself, don't forget to replace below <CONDA_ENV_DIR> with your own path.
 
 ```cmd
 :: under current directory
@@ -103,21 +103,21 @@ cmake --build . --config Release -j
 cd Release
 ```
 
-- You can also directly use our released `llama-cli-npu.exe` which has the same usage as this example `llama-cli-npu.cpp`
+- You can also directly use our released `llm-cli.exe` which has the same usage as this example `llm-cli.cpp`
 
 > [!NOTE]
-> Our released `llama-cli-npu.exe` can be found at <CONDA_ENV_DIR>\bigdl-core-npu
+> Our released `llm-cli.exe` can be found at <CONDA_ENV_DIR>\bigdl-core-npu
 
-## 4. Run `llama-cli-npu`
+## 4. Run `llm-cli`
 
-With built `llama-cli-npu`, you can run the example with specified paramaters. For example,
+With built `llm-cli`, you can run the example with specified paramaters. For example,
 
 ```cmd
 # Run simple text completion
-llama-cli-npu.exe -m <converted_model_path> -n 64 "AI是什么?"
+llm-cli.exe -m <converted_model_path> -n 64 "AI是什么?"
 
 # Run in conversation mode
-llama-cli-npu.exe -m <converted_model_path> -cnv
+llm-cli.exe -m <converted_model_path> -cnv
 ```
 
 Arguments info:
diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-cli.cpp
similarity index 100%
rename from python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp
rename to python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-cli.cpp