diff --git a/docker/llm/serving/cpu/docker/Dockerfile b/docker/llm/serving/cpu/docker/Dockerfile index 29fa58c7..7da76bbd 100644 --- a/docker/llm/serving/cpu/docker/Dockerfile +++ b/docker/llm/serving/cpu/docker/Dockerfile @@ -75,7 +75,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ pip install Jinja2==3.1.3 && \ pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu && \ pip install intel-extension-for-pytorch==2.2.0 && \ - pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \ + pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \ pip install transformers==4.36.2 && \ # Install vllm dependencies pip install --upgrade fastapi && \ diff --git a/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md b/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md index 0fa9888b..70185b86 100644 --- a/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md +++ b/docs/mddocs/Quickstart/deepspeed_autotp_fastapi_quickstart.md @@ -20,7 +20,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # configures OneAPI environment variables source /opt/intel/oneapi/setvars.sh pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5 diff --git a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md index 899f7d8b..dc1cc6f7 100644 --- a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md +++ b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md @@ -53,7 +53,7 @@ python ./alpaca_qlora_finetuning_cpu.py \ ```bash # need to run the alpaca stand-alone version first # for using mpirun -pip install oneccl_bind_pt --extra-index-url https://developer.intel.com/ipex-whl-stable +pip install oneccl_bind_pt --index-url https://developer.intel.com/ipex-whl-stable ``` 2. modify conf in `finetune_one_node_two_sockets.sh` and run diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md index ece64c34..26a3ba91 100644 --- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md +++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/baichuan2/README.md @@ -69,7 +69,7 @@ To accelerate speculative decoding on CPU, optionally, you can install our valid ```bash python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu python -m pip install intel-extension-for-pytorch==2.2.0 -python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment. # Install other dependencies diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md index e5702c12..04750429 100644 --- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md +++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama2/README.md @@ -104,7 +104,7 @@ To accelerate speculative decoding on CPU, you can install our validated version # Install IPEX 2.2.0+cpu python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu python -m pip install intel-extension-for-pytorch==2.2.0 -python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment. # Update transformers diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md index 84a0df2b..01f21473 100644 --- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md +++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/llama3/README.md @@ -81,7 +81,7 @@ To accelerate speculative decoding on CPU, you can install our validated version # Install IPEX 2.2.0+cpu python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu python -m pip install intel-extension-for-pytorch==2.2.0 -python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment. # Update transformers diff --git a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md index b58007a6..5d43aae9 100644 --- a/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md +++ b/python/llm/example/CPU/Speculative-Decoding/Self-Speculation/mistral/README.md @@ -90,7 +90,7 @@ To accelerate speculative decoding on CPU, you can install our validated version # Install IPEX 2.2.0+cpu python -m pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu python -m pip install intel-extension-for-pytorch==2.2.0 -python -m pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +python -m pip install oneccl_bind_pt==2.2.0 --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ # if there is any installation problem for oneccl_binding, you can also find suitable index url at "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" or "https://developer.intel.com/ipex-whl-stable-cpu" according to your environment. # Update transformers diff --git a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md index 437c23d4..a84072fc 100644 --- a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md +++ b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/README.md @@ -15,7 +15,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # configures OneAPI environment variables source /opt/intel/oneapi/setvars.sh pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5 diff --git a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md index b0078690..e1cbcf6b 100644 --- a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md @@ -17,7 +17,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte pip install transformers==4.45.0 "trl<0.12.0" datasets pip install bitsandbytes==0.45.1 scipy pip install fire peft==0.10.0 -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning ``` ### 2. Configures OneAPI environment variables diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md b/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md index 0c5dfc10..db87a07d 100644 --- a/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/README.md @@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte pip install transformers==4.45.0 "trl<0.12.0" datasets pip install fire peft==0.10.0 pip install bitsandbytes==0.45.1 scipy -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning ``` ### 2. Configures OneAPI environment variables diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md b/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md index cad1665a..77d55667 100644 --- a/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/chatglm_finetune/README.md @@ -21,7 +21,7 @@ pip install "deepspeed==0.13.1" pip install "mpi4py>=3.1.5" # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ ``` ### 2. Configures OneAPI Environment Variables diff --git a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md index 08be0bc8..9cc311d2 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/README.md @@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte pip install transformers==4.45.0 "trl<0.12.0" datasets pip install fire peft==0.10.0 pip install bitsandbytes==0.45.1 scipy -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning ``` ### 2. Configures OneAPI environment variables diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md index c859ca5a..8e7a326e 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md @@ -19,7 +19,7 @@ conda activate llm pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install transformers==4.36.1 datasets pip install fire peft==0.10.0 accelerate==0.23.0 -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning pip install bitsandbytes scipy # configures OneAPI environment variables source /opt/intel/oneapi/setvars.sh # necessary to run before installing deepspeed diff --git a/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md b/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md index 77a252d2..aed2c04d 100644 --- a/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/ReLora/README.md @@ -15,7 +15,7 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte pip install transformers==4.45.0 "trl<0.12.0" datasets pip install fire peft==0.10.0 pip install bitsandbytes==0.45.1 scipy -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning ``` ### 2. Configures OneAPI environment variables diff --git a/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md b/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md index c350be36..decc0e10 100644 --- a/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md +++ b/python/llm/example/GPU/Pipeline-Parallel-Inference/README.md @@ -48,7 +48,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ ``` ### 2. Run pipeline parallel inference on multiple GPUs diff --git a/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md index 32745cea..dcb999c4 100644 --- a/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md +++ b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md @@ -36,7 +36,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install oneccl_bind_pt==2.1.100 --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # configures OneAPI environment variables source /opt/intel/oneapi/setvars.sh pip install mpi4py fastapi uvicorn openai