diff --git a/docker/llm/serving/cpu/docker/Dockerfile b/docker/llm/serving/cpu/docker/Dockerfile
index 29fa58c7..7da76bbd 100644
--- a/docker/llm/serving/cpu/docker/Dockerfile
+++ b/docker/llm/serving/cpu/docker/Dockerfile
@@ -75,7 +75,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     pip install Jinja2==3.1.3 && \
     pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu && \
     pip install intel-extension-for-pytorch==2.2.0 && \
-    pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \
+    pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \
     pip install transformers==4.36.2 && \
 # Install vllm dependencies
     pip install --upgrade fastapi && \
diff --git a/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md b/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md
index 0fa9888b..70185b86 100644
--- a/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md
+++ b/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md
@@ -20,7 +20,7 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh
 pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
diff --git a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md
index 899f7d8b..dc1cc6f7 100644
--- a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md
+++ b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md
@@ -53,7 +53,7 @@ python ./alpaca_qlora_finetuning_cpu.py \
 ```bash
 # need to run the alpaca stand-alone version first
 # for using mpirun
-pip install oneccl_bind_pt --extra-index-url https://developer.intel.com/ipex-whl-stable
+pip install oneccl_bind_pt --index-url https://developer.intel.com/ipex-whl-stable
 ```
 
 2. modify conf in `finetune_one_node_two_sockets.sh` and run
diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md
index ece64c34..26a3ba91 100644
--- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md
+++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md
@@ -69,7 +69,7 @@ To accelerate speculative decoding on CPU, optionally, you can install our valid
 ```bash
 python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu
 python -m pip install intel-extension-for-pytorch==2.2.0
-python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment.
 
 # Install other dependencies
diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md
index e5702c12..04750429 100644
--- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md
+++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md
@@ -104,7 +104,7 @@ To accelerate speculative decoding on CPU, you can install our validated version
 # Install IPEX 2.2.0+cpu
 python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu
 python -m pip install intel-extension-for-pytorch==2.2.0
-python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment.
 
 # Update transformers
diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md
index 84a0df2b..01f21473 100644
--- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md
+++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md
@@ -81,7 +81,7 @@ To accelerate speculative decoding on CPU, you can install our validated version
 # Install IPEX 2.2.0+cpu
 python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu
 python -m pip install intel-extension-for-pytorch==2.2.0
-python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment.
 
 # Update transformers
diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md
index b58007a6..5d43aae9 100644
--- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md
+++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md
@@ -90,7 +90,7 @@ To accelerate speculative decoding on CPU, you can install our validated version
 # Install IPEX 2.2.0+cpu
 python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu
 python -m pip install intel-extension-for-pytorch==2.2.0
-python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment.
 
 # Update transformers
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md
index 437c23d4..a84072fc 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md
+++ b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md
@@ -15,7 +15,7 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh
 pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
diff --git a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md
index b0078690..e1cbcf6b 100644
--- a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md
@@ -17,7 +17,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install transformers==4.45.0 "trl<0.12.0" datasets
 pip install bitsandbytes==0.45.1 scipy
 pip install fire peft==0.10.0
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
 ```
 
 ### 2. Configures OneAPI environment variables
diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md b/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md
index 0c5dfc10..db87a07d 100644
--- a/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md
@@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install transformers==4.45.0 "trl<0.12.0" datasets
 pip install fire peft==0.10.0
 pip install bitsandbytes==0.45.1 scipy
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
 ```
 
 ### 2. Configures OneAPI environment variables
diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md b/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md
index cad1665a..77d55667 100644
--- a/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md
@@ -21,7 +21,7 @@ pip install "deepspeed==0.13.1"
 pip install "mpi4py>=3.1.5"
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
 ### 2. Configures OneAPI Environment Variables
diff --git a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md
index 08be0bc8..9cc311d2 100644
--- a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md
@@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install transformers==4.45.0 "trl<0.12.0" datasets
 pip install fire peft==0.10.0
 pip install bitsandbytes==0.45.1 scipy
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
 ```
 
 ### 2. Configures OneAPI environment variables
diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
index c859ca5a..8e7a326e 100644
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
@@ -19,7 +19,7 @@ conda activate llm
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 pip install transformers==4.36.1 datasets
 pip install fire peft==0.10.0 accelerate==0.23.0
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
 pip install bitsandbytes scipy
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh # necessary to run before installing deepspeed
diff --git a/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md b/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md
index 77a252d2..aed2c04d 100644
--- a/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md
@@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install transformers==4.45.0 "trl<0.12.0" datasets
 pip install fire peft==0.10.0
 pip install bitsandbytes==0.45.1 scipy
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
 ```
 
 ### 2. Configures OneAPI environment variables
diff --git a/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md b/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md
index c350be36..decc0e10 100644
--- a/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md
+++ b/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md
@@ -48,7 +48,7 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 ```
 
 ### 2. Run pipeline parallel inference on multiple GPUs
diff --git a/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md
index 32745cea..dcb999c4 100644
--- a/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md
+++ b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md
@@ -36,7 +36,7 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh
 pip install mpi4py fastapi uvicorn openai