From 9a96af42322980f8fbbfc553c3846325a1e2aeae Mon Sep 17 00:00:00 2001
From: Jin Qiao <89779290+JinBridger@users.noreply.github.com>
Date: Thu, 16 May 2024 10:46:29 +0800
Subject: [PATCH] Remove oneAPI pip install command in related examples
 (#11030)

* Remove pip install command in windows installation guide

* fix chatglm3 installation guide

* Fix gemma cpu example

* Apply on other examples

* fix
---
 .../Model/gemma/README.md                     | 132 +++++-------------
 .../Model/aquila/README.md                    |   2 -
 .../Model/aquila2/README.md                   |   2 -
 .../Model/baichuan/README.md                  |   2 -
 .../Model/baichuan2/README.md                 |   2 -
 .../Model/bluelm/README.md                    |   2 -
 .../Model/chatglm2/README.md                  |  43 +++---
 .../Model/chatglm3/README.md                  |   4 -
 .../Model/chinese-llama2/README.md            |   2 -
 .../Model/codegemma/README.md                 |   2 -
 .../Model/codellama/readme.md                 |   2 -
 .../Model/cohere/README.md                    |   2 -
 .../Model/deciLM-7b/README.md                 |   2 -
 .../Model/deepseek/README.md                  |   2 -
 .../Model/distil-whisper/README.md            |   2 -
 .../Model/dolly-v1/README.md                  |   2 -
 .../Model/dolly-v2/README.md                  |   2 -
 .../Model/falcon/README.md                    |   2 -
 .../Model/flan-t5/README.md                   |   2 -
 .../Model/gemma/README.md                     |   2 -
 .../Model/gpt-j/readme.md                     |   2 -
 .../Model/internlm/README.md                  |   2 -
 .../Model/internlm2/README.md                 |   2 -
 .../Model/llama2/README.md                    |   2 -
 .../Model/llama3/README.md                    |   2 -
 .../Model/mistral/README.md                   |   2 -
 .../Model/mixtral/README.md                   |   2 -
 .../Model/mpt/README.md                       |   2 -
 .../Model/phi-1_5/README.md                   |   2 -
 .../Model/phi-2/README.md                     |   2 -
 .../Model/phi-3/README.md                     |   2 -
 .../Model/phixtral/README.md                  |   2 -
 .../Model/qwen-vl/README.md                   |   2 -
 .../Model/qwen/README.md                      |   2 -
 .../Model/qwen1.5/README.md                   |   2 -
 .../Model/redpajama/README.md                 |   2 -
 .../Model/replit/README.md                    |   2 -
 .../Model/rwkv4/README.md                     |   2 -
 .../Model/rwkv5/README.md                     |   2 -
 .../Model/solar/README.md                     |   2 -
 .../Model/stablelm/README.md                  |   2 -
 .../Model/starcoder/readme.md                 |   2 -
 .../Model/vicuna/README.md                    |   2 -
 .../Model/voiceassistant/README.md            |   2 -
 .../Model/whisper/readme.md                   |   2 -
 .../Model/yi/README.md                        |   2 -
 .../Model/yuan2/README.md                     |   2 -
 .../Save-Load/README.md                       |  45 +++---
 python/llm/example/GPU/LangChain/README.md    |  41 +++---
 python/llm/example/GPU/LlamaIndex/README.md   |  41 +++---
 .../GPU/Long-Context/Chatglm3-32K/README.md   |  41 +++---
 .../GPU/Long-Context/LLaMA2-32K/README.md     |  41 +++---
 .../example/GPU/ModelScope-Models/README.md   |  41 +++---
 .../GPU/ModelScope-Models/Save-Load/README.md |  45 +++---
 .../PyTorch-Models/Model/aquila2/README.md    |   2 -
 .../PyTorch-Models/Model/baichuan/README.md   |   2 -
 .../PyTorch-Models/Model/baichuan2/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/bark/README.md   |   2 -
 .../GPU/PyTorch-Models/Model/bluelm/README.md |   2 -
 .../PyTorch-Models/Model/chatglm2/README.md   |   4 -
 .../PyTorch-Models/Model/chatglm3/README.md   |   4 -
 .../PyTorch-Models/Model/codegemma/README.md  |   2 -
 .../PyTorch-Models/Model/codellama/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/cohere/README.md |   2 -
 .../PyTorch-Models/Model/deciLM-7b/README.md  |   2 -
 .../PyTorch-Models/Model/deepseek/README.md   |   2 -
 .../Model/distil-whisper/README.md            |   2 -
 .../PyTorch-Models/Model/dolly-v1/README.md   |   2 -
 .../PyTorch-Models/Model/dolly-v2/README.md   |   2 -
 .../PyTorch-Models/Model/flan-t5/README.md    |   2 -
 .../PyTorch-Models/Model/internlm2/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/llama2/README.md |   2 -
 .../GPU/PyTorch-Models/Model/llama3/README.md |   2 -
 .../GPU/PyTorch-Models/Model/llava/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/mamba/README.md  |   2 -
 .../PyTorch-Models/Model/mistral/README.md    |   2 -
 .../PyTorch-Models/Model/mixtral/README.md    |   2 -
 .../PyTorch-Models/Model/phi-1_5/README.md    |   2 -
 .../GPU/PyTorch-Models/Model/phi-2/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/phi-3/README.md  |   2 -
 .../PyTorch-Models/Model/phixtral/README.md   |   2 -
 .../PyTorch-Models/Model/qwen-vl/README.md    |   2 -
 .../PyTorch-Models/Model/qwen1.5/README.md    |   2 -
 .../GPU/PyTorch-Models/Model/replit/README.md |   2 -
 .../GPU/PyTorch-Models/Model/solar/README.md  |   2 -
 .../PyTorch-Models/Model/speech-t5/README.md  |   2 -
 .../PyTorch-Models/Model/stablelm/README.md   |   2 -
 .../PyTorch-Models/Model/starcoder/README.md  |   2 -
 .../GPU/PyTorch-Models/Model/yi/README.md     |   2 -
 .../GPU/PyTorch-Models/Model/yuan2/README.md  |   2 -
 90 files changed, 231 insertions(+), 407 deletions(-)

diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md
index c8572e04..f00acd8a 100644
--- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md
+++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md
@@ -1,122 +1,60 @@
 # Gemma
-In this directory, you will find examples on how you could apply IPEX-LLM INT4 optimizations on Google Gemma models on [Intel GPUs](../../../README.md). For illustration purposes, we utilize the [google/gemma-7b-it ](https://huggingface.co/google/gemma-7b-it) and [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) as reference Gemma models.
 
-## Requirements
-To run these examples with IPEX-LLM on Intel GPUs, we have some recommended requirements for your machine, please refer to [here](../../../README.md#requirements) for more information.
+In this directory, you will find examples on how you could apply IPEX-LLM INT4 optimizations on Gemma models. For illustration purposes, we utilize the [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it) and [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) as a reference Gemma model.
 
-**Important: According to Gemma's requirement, please make sure you have installed `transformers==4.38.1` to run the example.**
+## 0. Requirements
+To run these examples with IPEX-LLM, we have some recommended requirements for your machine, please refer to [here](../README.md#recommended-requirements) for more information.
 
 ## Example: Predict Tokens using `generate()` API
-In the example [generate.py](./generate.py), we show a basic use case for a Gemma model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations on Intel GPUs.
+In the example [generate.py](./generate.py), we show a basic use case for a Gemma model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations.
 ### 1. Install
-#### 1.1 Installation on Linux
-We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#).
+We suggest using conda to manage the Python environment:
 
 After installing conda, create a Python environment for IPEX-LLM:
 ```bash
 conda create -n llm python=3.11 # recommend to use Python 3.11
 conda activate llm
 
-# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
-pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option
 
 # According to Gemma's requirement, please make sure you are using a stable version of Transformers, 4.38.1 or newer.
 pip install transformers==4.38.1
 ```
 
-#### 1.2 Installation on Windows
-We suggest using conda to manage environment:
-```bash
-conda create -n llm python=3.11 libuv
-conda activate llm
-# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
-pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-
-# According to Gemma's requirement, please make sure you are using a stable version of Transformers, 4.38.1 or newer.
-pip install transformers==4.38.1
+### 2. Run
+```
+python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
-```bash
-source /opt/intel/oneapi/setvars.sh
-```
-
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
-### 3. Runtime Configurations
-For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
-#### 3.1 Configurations for Linux
-<details>
-
-<summary>For Intel Arc™ A-Series Graphics and Intel Data Center GPU Flex Series</summary>
-
-```bash
-export USE_XETLA=OFF
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-```
-
-</details>
-
-<details>
-
-<summary>For Intel Data Center GPU Max Series</summary>
-
-```bash
-export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-export ENABLE_SDP_FUSION=1
-```
-> Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
-</details>
-
-#### 3.2 Configurations for Windows
-<details>
-
-<summary>For Intel iGPU</summary>
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-</details>
-
-<details>
-
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-</details>
-
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
-### 4. Running examples
-
-```bash
-python ./generate.py --prompt 'What is AI?'
-```
-
-In the example, several arguments can be passed to satisfy your requirements:
-
-- `--repo-id-or-model-path REPO_ID_OR_MODEL_PATH`: argument defining the huggingface repo id for the Gemma model (e.g. `google/gemma-7b-it` and `google/gemma-2b-it`) to be downloaded, or the path to the huggingface checkpoint folder. It is default to be `'google/gemma-7b-it'`.
+Arguments info:
+- `--repo-id-or-model-path REPO_ID_OR_MODEL_PATH`: argument defining the huggingface repo id for the Gemma model (e.g. `google/gemma-7b-it`) to be downloaded, or the path to the huggingface checkpoint folder. It is default to be `'google/gemma-7b-it'`.
 - `--prompt PROMPT`: argument defining the prompt to be infered (with integrated prompt format for chat). It is default to be `'What is AI?'`.
 - `--n-predict N_PREDICT`: argument defining the max number of tokens to predict. It is default to be `32`.
 
-#### 2.3 Sample Output
+> **Note**: When loading the model in 4-bit, IPEX-LLM converts linear layers in the model into INT4 format. In theory, a *X*B model saved in 16-bit will requires approximately 2*X* GB of memory for loading, and ~0.5*X* GB memory for further inference.
+>
+> Please select the appropriate size of the Gemma model based on the capabilities of your machine.
+
+#### 2.1 Client
+On client Windows machine, it is recommended to run directly with full utilization of all cores:
+```powershell
+python ./generate.py 
+```
+
+#### 2.2 Server
+For optimal performance on server, it is recommended to set several environment variables (refer to [here](../README.md#best-known-configuration-on-linux) for more information), and run the example with all the physical cores of a single socket.
+
+E.g. on Linux,
+```bash
+# set IPEX-LLM env variables
+source ipex-llm-init
+
+# e.g. for a server with 48 cores per socket
+export OMP_NUM_THREADS=48
+numactl -C 0-47 -m 0 python ./generate.py
+```
+
+#### 2.4 Sample Output
 #### [google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)
 ```log
 Inference time: xxxx s
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila/README.md
index 316a9652..c8f4e701 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila/README.md
@@ -27,8 +27,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila2/README.md
index 3cdfd4b7..84bd3a20 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/aquila2/README.md
@@ -27,8 +27,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan/README.md
index b56e272d..9d3b4d78 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan2/README.md
index 3ecfa959..2cd38f57 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/baichuan2/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/bluelm/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/bluelm/README.md
index e6181f47..3cf8120d 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/bluelm/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/bluelm/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm2/README.md
index bf01503c..dab814ee 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm2/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@@ -161,16 +159,17 @@ conda activate llm
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
 
 ### 3. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
@@ -182,6 +181,7 @@ For optimal performance, it is recommended to set several environment variables.
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -193,11 +193,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -212,7 +224,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -220,15 +232,8 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
 ### 4. Running examples
 **Stream Chat using `stream_chat()` API**:
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm3/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm3/README.md
index 8d235b12..c4aca0e9 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm3/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chatglm3/README.md
@@ -22,8 +22,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@@ -158,8 +156,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chinese-llama2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chinese-llama2/README.md
index 86ea6ddb..974bca27 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chinese-llama2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/chinese-llama2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codegemma/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codegemma/README.md
index 606284da..b0564824 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codegemma/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codegemma/README.md
@@ -28,8 +28,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codellama/readme.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codellama/readme.md
index 4c6a6903..f977a09b 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codellama/readme.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/codellama/readme.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md
index 8ab61799..ab6947d4 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md
index 8822ff45..885cf792 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deepseek/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deepseek/README.md
index 03446bd6..04ef2859 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deepseek/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/deepseek/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md
index 70c07725..954e7460 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v1/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v1/README.md
index cdf488a6..cc45b258 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v1/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v1/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v2/README.md
index f515d6e8..ace4489b 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/dolly-v2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/falcon/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/falcon/README.md
index b1472e47..0749d294 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/falcon/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/falcon/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/flan-t5/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/flan-t5/README.md
index 16a0f106..a309c8e5 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/flan-t5/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/flan-t5/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gemma/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gemma/README.md
index a65bba80..14aa69db 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gemma/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gemma/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gpt-j/readme.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gpt-j/readme.md
index 74405ad0..25b3fa86 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gpt-j/readme.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/gpt-j/readme.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm/README.md
index 114fcdad..72dfccb6 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm2/README.md
index ea314a1d..4612052e 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/internlm2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama2/README.md
index 62f582e6..cdd06484 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama3/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama3/README.md
index 87a021fc..3280ead6 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama3/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/llama3/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mistral/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mistral/README.md
index 53c0d4f6..4de40cab 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mistral/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mistral/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mixtral/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mixtral/README.md
index a93c8bf8..5e944968 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mixtral/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mixtral/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md
index 0f1caed7..2bafc0e3 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md
index e7522b85..a9d4a59f 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-2/README.md
index 12ff1653..d8c37adb 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-2/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-3/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-3/README.md
index a05ab6d2..cb5b2557 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-3/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phi-3/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phixtral/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phixtral/README.md
index 4f6861ce..ab690af3 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phixtral/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/phixtral/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md
index f7b9cb12..fb02816b 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen/README.md
index 127ece18..500e2b0f 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md
index e0fadcc1..681f035a 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md
@@ -27,8 +27,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/redpajama/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/redpajama/README.md
index 5af141a5..1376e42d 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/redpajama/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/redpajama/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/replit/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/replit/README.md
index adb93547..7c12b977 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/replit/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/replit/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv4/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv4/README.md
index cc3a6210..55d94f6d 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv4/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv4/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv5/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv5/README.md
index 62ac598c..63cff014 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv5/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/rwkv5/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/solar/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/solar/README.md
index ed66eca3..811d712e 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/solar/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/solar/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/stablelm/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/stablelm/README.md
index e000b7a3..5b5f18dd 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/stablelm/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/stablelm/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/starcoder/readme.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/starcoder/readme.md
index 3b6f4053..2787a13d 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/starcoder/readme.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/starcoder/readme.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/vicuna/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/vicuna/README.md
index 81f91d4e..852c29de 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/vicuna/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/vicuna/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/voiceassistant/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/voiceassistant/README.md
index 44fdfff2..67c0fb26 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/voiceassistant/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/voiceassistant/README.md
@@ -29,8 +29,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/whisper/readme.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/whisper/readme.md
index 3925bc44..29a4dc46 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/whisper/readme.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/whisper/readme.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yi/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yi/README.md
index 9bfa9637..1cda8888 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yi/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yi/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yuan2/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yuan2/README.md
index 98bca49e..dd42cec1 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yuan2/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/yuan2/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Save-Load/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Save-Load/README.md
index f9849ff8..9aae6c8c 100644
--- a/python/llm/example/GPU/HF-Transformers-AutoModels/Save-Load/README.md
+++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Save-Load/README.md
@@ -26,20 +26,19 @@ conda activate llm
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
 
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
-
-
-### 3. Run
+### 3. Runtime Configurations
+For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
 <details>
 
@@ -48,6 +47,7 @@ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -59,11 +59,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -78,7 +90,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -86,15 +98,8 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
 ### 4. Running examples
 
diff --git a/python/llm/example/GPU/LangChain/README.md b/python/llm/example/GPU/LangChain/README.md
index 3115bdaf..ea5638cf 100644
--- a/python/llm/example/GPU/LangChain/README.md
+++ b/python/llm/example/GPU/LangChain/README.md
@@ -13,16 +13,17 @@ pip install -U chromadb==0.3.25
 pip install -U pandas==2.0.3
 ```
 
-### 3. Configures OneAPI environment variables
-#### 3.1 Configurations for Linux
+### 3. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
-#### 3.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
+
 ### 4. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 4.1 Configurations for Linux
@@ -33,6 +34,7 @@ For optimal performance, it is recommended to set several environment variables.
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -44,11 +46,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 4.2 Configurations for Windows
 <details>
 
@@ -63,7 +77,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -71,15 +85,8 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
 ### 5. Run the examples
 
diff --git a/python/llm/example/GPU/LlamaIndex/README.md b/python/llm/example/GPU/LlamaIndex/README.md
index b01d4c47..53d5d7dd 100644
--- a/python/llm/example/GPU/LlamaIndex/README.md
+++ b/python/llm/example/GPU/LlamaIndex/README.md
@@ -71,16 +71,17 @@ The RAG example ([rag.py](./rag.py)) is adapted from the [Official llama index R
     wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"
     ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
+
 ### 3. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
@@ -91,6 +92,7 @@ For optimal performance, it is recommended to set several environment variables.
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -102,11 +104,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -121,7 +135,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -129,15 +143,8 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
 
 ### 4. Running the RAG example
diff --git a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
index 7d3590b9..b030eb9e 100644
--- a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
@@ -21,20 +21,22 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
+
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
+
 ### 3. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
@@ -45,6 +47,7 @@ For optimal performance, it is recommended to set several environment variables.
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -56,11 +59,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -75,7 +90,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -83,15 +98,9 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 ### 4. Running examples
 #### 4.1 Using simple prompt
 ```
diff --git a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
index d4a0fc34..20529a0d 100644
--- a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
@@ -21,20 +21,22 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
+
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
+
 ### 3. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
@@ -45,6 +47,7 @@ For optimal performance, it is recommended to set several environment variables.
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -56,11 +59,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -75,7 +90,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -83,15 +98,9 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 ### 4. Running examples
 #### 4.1 Using simple prompt
 ```
diff --git a/python/llm/example/GPU/ModelScope-Models/README.md b/python/llm/example/GPU/ModelScope-Models/README.md
index fe3227c2..2b5ecacb 100644
--- a/python/llm/example/GPU/ModelScope-Models/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/README.md
@@ -29,27 +29,28 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install modelscope
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
 
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
-
 ### 3. Runtime Configurations
 For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
 <details>
 
 <summary>For Intel Arc™ A-Series Graphics and Intel Data Center GPU Flex Series</summary>
+
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -61,11 +62,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -80,7 +93,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -88,15 +101,9 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 ### 4. Running examples
 
 ```
diff --git a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
index 33b1b900..ccd3a115 100644
--- a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
@@ -28,20 +28,19 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install modelscope==1.11.0
 ```
 
-### 2. Configures OneAPI environment variables
-#### 2.1 Configurations for Linux
+### 2. Configures OneAPI environment variables for Linux
+
+> [!NOTE]
+> Skip this step if you are running on Windows.
+
+This is a required step on Linux for APT or offline installed oneAPI. Skip this step for PIP-installed oneAPI.
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
 
-#### 2.2 Configurations for Windows
-```cmd
-call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-```
-> Note: Please make sure you are using **CMD** (**Anaconda Prompt** if using conda) to run the command as PowerShell is not supported.
-
-
-### 3. Run
+### 3. Runtime Configurations
+For optimal performance, it is recommended to set several environment variables. Please check out the suggestions based on your device.
 #### 3.1 Configurations for Linux
 <details>
 
@@ -50,6 +49,7 @@ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
 ```bash
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 ```
 
 </details>
@@ -61,11 +61,23 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```bash
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+export SYCL_CACHE_PERSISTENT=1
 export ENABLE_SDP_FUSION=1
 ```
 > Note: Please note that `libtcmalloc.so` can be installed by `conda install -c conda-forge -y gperftools=2.10`.
 </details>
 
+<details>
+
+<summary>For Intel iGPU</summary>
+
+```bash
+export SYCL_CACHE_PERSISTENT=1
+export BIGDL_LLM_XMX_DISABLED=1
+```
+
+</details>
+
 #### 3.2 Configurations for Windows
 <details>
 
@@ -80,7 +92,7 @@ set BIGDL_LLM_XMX_DISABLED=1
 
 <details>
 
-<summary>For Intel Arc™ A300-Series or Pro A60</summary>
+<summary>For Intel Arc™ A-Series Graphics</summary>
 
 ```cmd
 set SYCL_CACHE_PERSISTENT=1
@@ -88,15 +100,8 @@ set SYCL_CACHE_PERSISTENT=1
 
 </details>
 
-<details>
-
-<summary>For other Intel dGPU Series</summary>
-
-There is no need to set further environment variables.
-
-</details>
-
-> Note: For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
+> [!NOTE]
+> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
 
 ### 4. Running examples
 
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
index c81fc6d7..da481b37 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
index 41a986e5..d786bc7f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
index 9f62c2fd..d4f120f5 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
index 31eb9afc..5455050e 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
index 301e3525..db198645 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
index 6ad1407d..ace1a38b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@@ -151,8 +149,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
index a7077195..e85d070a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@@ -150,8 +148,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
index fa7363b3..df37bf83 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
@@ -28,8 +28,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
index 617b8bf5..497a6828 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md b/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md
index 8ab61799..ab6947d4 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
index 3bbdc081..ff8eab5a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
index 3f777467..4c1a9898 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
index f5407486..8268a0c0 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
index 006c68dd..c35fbd4f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
index 8de52c9c..2c88b488 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
index d4a45ec4..f7a3c9ad 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
index ea314a1d..4612052e 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
index 05f24e86..bde37043 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
index 32c45dd5..545a6288 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
index 06449d2b..461ae53a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
@@ -29,8 +29,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
index d6c4f53a..2efc010d 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
index 9423f514..4fc017e1 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
index c54a257f..3179f431 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
index 945d2665..3a4c88dc 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
index 7c0707f9..0ae7e51b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
index ed8051b6..cc4542c9 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
index 37068e0d..1458f2ab 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
index 6d0bc5b5..5f9a617a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
index 110b1f31..801fdd59 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
index 3627c58e..4938682a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
index bd549640..2b718cd4 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
index bec5410a..171ff392 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
index bb68e00a..3c195295 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
@@ -24,8 +24,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
index aec53ec9..e1ffd7d6 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
@@ -21,8 +21,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
index b3f751af..b48b9532 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
@@ -23,8 +23,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
index 8c562b22..87c1ccd9 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
@@ -26,8 +26,6 @@ We suggest using conda to manage environment:
 ```bash
 conda create -n llm python=3.11 libuv
 conda activate llm
-# below command will use pip to install the Intel oneAPI Base Toolkit 2024.0
-pip install dpcpp-cpp-rt==2024.0.2 mkl-dpcpp==2024.0.0 onednn==2024.0.0
 
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/