From 56cb992497895eeff2dc83a0a226b7ec222d4836 Mon Sep 17 00:00:00 2001 From: ZehuaCao <47251317+Romanticoseu@users.noreply.github.com> Date: Fri, 17 May 2024 15:52:20 +0800 Subject: [PATCH] LLM: Modify CPU Installation Command for most examples (#11049) * init * refine * refine * refine * modify hf-agent example * modify all CPU model example * remove readthedoc modify * replace powershell with cmd * fix repo * fix repo * update * remove comment on windows code block * update * update * update * update --------- Co-authored-by: xiangyuT --- README.md | 2 +- .../source/doc/LLM/Overview/install_cpu.md | 2 +- docs/readthedocs/source/index.rst | 2 +- .../CPU/Applications/hf-agent/README.md | 16 +++++++-- .../CPU/Applications/streaming-llm/README.md | 10 ++++++ .../example/CPU/Deepspeed-AutoTP/install.sh | 2 +- .../Advanced-Quantizations/AWQ/README.md | 19 ++++++++-- .../Advanced-Quantizations/GGUF/README.md | 17 +++++++-- .../Advanced-Quantizations/GPTQ/README.md | 19 ++++++++-- .../Model/README.md | 2 +- .../Model/aquila/README.md | 17 +++++++-- .../Model/aquila2/README.md | 17 +++++++-- .../Model/baichuan/README.md | 4 ++- .../Model/baichuan2/README.md | 18 ++++++++-- .../Model/bluelm/README.md | 17 +++++++-- .../Model/chatglm/README.md | 18 ++++++++-- .../Model/chatglm2/README.md | 34 +++++++++++++++--- .../Model/chatglm3/README.md | 34 +++++++++++++++--- .../Model/codegemma/README.md | 18 ++++++++-- .../Model/codellama/README.md | 18 ++++++++-- .../Model/codeshell/README.md | 17 +++++++-- .../Model/cohere/README.md | 20 +++++++++-- .../Model/deciLM-7b/README.md | 18 ++++++++-- .../Model/deepseek-moe/README.md | 18 ++++++++-- .../Model/deepseek/README.md | 17 +++++++-- .../Model/distil-whisper/README.md | 18 ++++++++-- .../Model/dolly_v1/README.md | 17 +++++++-- .../Model/dolly_v2/README.md | 18 ++++++++-- .../Model/falcon/README.md | 18 ++++++++-- .../Model/flan-t5/README.md | 17 +++++++-- .../Model/fuyu/README.md | 18 ++++++++-- .../Model/gemma/README.md | 19 ++++++++-- .../Model/internlm-xcomposer/README.md | 20 +++++++++-- .../Model/internlm/README.md | 17 +++++++-- .../Model/internlm2/README.md | 17 +++++++-- .../Model/llama2/README.md | 18 ++++++++-- .../Model/llama3/README.md | 18 ++++++++-- .../Model/mistral/README.md | 19 ++++++++-- .../Model/mixtral/README.md | 18 ++++++++-- .../Model/moss/README.md | 17 +++++++-- .../Model/mpt/README.md | 17 +++++++-- .../Model/phi-1_5/README.md | 18 ++++++++-- .../Model/phi-2/README.md | 17 +++++++-- .../Model/phi-3/README.md | 18 ++++++++-- .../Model/phixtral/README.md | 18 ++++++++-- .../Model/phoenix/README.md | 17 +++++++-- .../Model/qwen-vl/README.md | 20 +++++++++-- .../Model/qwen/README.md | 17 +++++++-- .../Model/qwen1.5/README.md | 22 ++++++++++-- .../Model/redpajama/README.md | 17 +++++++-- .../Model/replit/README.md | 18 ++++++++-- .../Model/skywork/README.md | 18 ++++++++-- .../Model/solar/README.md | 18 ++++++++-- .../Model/stablelm/README.md | 19 ++++++++-- .../Model/starcoder/README.md | 17 +++++++-- .../Model/vicuna/README.md | 17 +++++++-- .../Model/whisper/readme.md | 36 ++++++++++++++++--- .../Model/wizardcoder-python/README.md | 17 +++++++-- .../Model/yi/README.md | 18 ++++++++-- .../Model/yuan2/README.md | 19 ++++++++-- .../Model/ziya/README.md | 18 ++++++++-- .../More-Data-Types/README.md | 2 +- .../Save-Load/README.md | 2 +- .../example/CPU/ModelScope-Models/README.md | 18 ++++++++-- .../llm/example/CPU/Native-Models/README.md | 13 +++++++ .../PyTorch-Models/Model/aquila2/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/bark/README.md | 19 ++++++++-- .../CPU/PyTorch-Models/Model/bert/README.md | 16 +++++++-- .../CPU/PyTorch-Models/Model/bluelm/README.md | 17 +++++++-- .../PyTorch-Models/Model/chatglm/README.md | 21 +++++++++-- .../PyTorch-Models/Model/chatglm3/README.md | 17 +++++++-- .../PyTorch-Models/Model/codegemma/README.md | 21 ++++++++--- .../PyTorch-Models/Model/codellama/README.md | 19 ++++++++-- .../PyTorch-Models/Model/codeshell/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/cohere/README.md | 21 +++++++++-- .../PyTorch-Models/Model/deciLM-7b/README.md | 18 ++++++++-- .../Model/deepseek-moe/README.md | 18 ++++++++-- .../PyTorch-Models/Model/deepseek/README.md | 17 +++++++-- .../Model/distil-whisper/README.md | 20 +++++++++-- .../PyTorch-Models/Model/flan-t5/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/fuyu/README.md | 19 ++++++++-- .../Model/internlm-xcomposer/README.md | 18 ++++++++-- .../PyTorch-Models/Model/internlm2/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/llama2/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/llama3/README.md | 19 ++++++++-- .../CPU/PyTorch-Models/Model/llava/README.md | 24 +++++++++++-- .../CPU/PyTorch-Models/Model/mamba/README.md | 18 ++++++++-- .../PyTorch-Models/Model/meta-llama/README.md | 23 ++++++++++-- .../PyTorch-Models/Model/mistral/README.md | 19 ++++++++-- .../PyTorch-Models/Model/mixtral/README.md | 19 ++++++++-- .../Model/openai-whisper/readme.md | 19 ++++++++-- .../PyTorch-Models/Model/phi-1_5/README.md | 18 ++++++++-- .../CPU/PyTorch-Models/Model/phi-2/README.md | 18 ++++++++-- .../CPU/PyTorch-Models/Model/phi-3/README.md | 19 ++++++++-- .../PyTorch-Models/Model/phixtral/README.md | 18 ++++++++-- .../PyTorch-Models/Model/qwen-vl/README.md | 18 ++++++++-- .../PyTorch-Models/Model/qwen1.5/README.md | 22 ++++++++++-- .../PyTorch-Models/Model/skywork/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/solar/README.md | 18 ++++++++-- .../PyTorch-Models/Model/stablelm/README.md | 19 ++++++++-- .../Model/wizardcoder-python/README.md | 17 +++++++-- .../CPU/PyTorch-Models/Model/yi/README.md | 18 ++++++++-- .../CPU/PyTorch-Models/Model/yuan2/README.md | 23 +++++++++--- .../CPU/PyTorch-Models/Model/ziya/README.md | 18 ++++++++-- .../PyTorch-Models/More-Data-Types/README.md | 15 +++++++- .../CPU/PyTorch-Models/Save-Load/README.md | 15 +++++++- .../Model/cohere/README.md | 4 +-- .../Model/mpt/README.md | 2 +- .../GPU/PyTorch-Models/Model/cohere/README.md | 4 +-- 109 files changed, 1621 insertions(+), 225 deletions(-) diff --git a/README.md b/README.md index ab44c7a3..7cc5a356 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ See the demo of running [*Text-Generation-WebUI*](https://ipex-llm.readthedocs.i - LLM finetuning on Intel [GPU](python/llm/example/GPU/LLM-Finetuning), including [LoRA](python/llm/example/GPU/LLM-Finetuning/LoRA), [QLoRA](python/llm/example/GPU/LLM-Finetuning/QLoRA), [DPO](python/llm/example/GPU/LLM-Finetuning/DPO), [QA-LoRA](python/llm/example/GPU/LLM-Finetuning/QA-LoRA) and [ReLoRA](python/llm/example/GPU/LLM-Finetuning/ReLora) - QLoRA finetuning on Intel [CPU](python/llm/example/CPU/QLoRA-FineTuning) - Integration with community libraries - - [HuggingFace tansformers](python/llm/example/GPU/HF-Transformers-AutoModels) + - [HuggingFace transformers](python/llm/example/GPU/HF-Transformers-AutoModels) - [Standard PyTorch model](python/llm/example/GPU/PyTorch-Models) - [DeepSpeed-AutoTP](python/llm/example/GPU/Deepspeed-AutoTP) - [HuggingFace PEFT](python/llm/example/GPU/LLM-Finetuning/HF-PEFT) diff --git a/docs/readthedocs/source/doc/LLM/Overview/install_cpu.md b/docs/readthedocs/source/doc/LLM/Overview/install_cpu.md index cbe3ce83..c19ddd4c 100644 --- a/docs/readthedocs/source/doc/LLM/Overview/install_cpu.md +++ b/docs/readthedocs/source/doc/LLM/Overview/install_cpu.md @@ -97,4 +97,4 @@ Then for running a LLM model with IPEX-LLM optimizations (taking an `example.py` # e.g. for a server with 48 cores per socket export OMP_NUM_THREADS=48 numactl -C 0-47 -m 0 python example.py -``` +``` \ No newline at end of file diff --git a/docs/readthedocs/source/index.rst b/docs/readthedocs/source/index.rst index 53b33be9..c0394297 100644 --- a/docs/readthedocs/source/index.rst +++ b/docs/readthedocs/source/index.rst @@ -162,7 +162,7 @@ Code Examples * Integration with community libraries - * `HuggingFace tansformers `_ + * `HuggingFace transformers `_ * `Standard PyTorch model `_ * `DeepSpeed-AutoTP `_ * `HuggingFace PEFT `_ diff --git a/python/llm/example/CPU/Applications/hf-agent/README.md b/python/llm/example/CPU/Applications/hf-agent/README.md index 455f10ed..d9103297 100644 --- a/python/llm/example/CPU/Applications/hf-agent/README.md +++ b/python/llm/example/CPU/Applications/hf-agent/README.md @@ -9,14 +9,26 @@ To run this example with IPEX-LLM, we have some recommended requirements for you ### 1. Install We suggest using conda to manage environment: + +On Linux: ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option + # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install pillow # additional package required for opening images ``` +On Windows: +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install pillow +``` + ### 2. Run ``` python ./run_agent.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --image-path IMAGE_PATH @@ -32,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./run_agent.py --image-path IMAGE_PATH ``` diff --git a/python/llm/example/CPU/Applications/streaming-llm/README.md b/python/llm/example/CPU/Applications/streaming-llm/README.md index 571f51a3..f5055f2b 100644 --- a/python/llm/example/CPU/Applications/streaming-llm/README.md +++ b/python/llm/example/CPU/Applications/streaming-llm/README.md @@ -9,10 +9,20 @@ model = AutoModelForCausalLM.from_pretrained(model_name_or_path, load_in_4bit=Tr ## Prepare Environment We suggest using conda to manage environment: + +On Linux ```bash conda create -n llm python=3.11 conda activate llm +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: +```cmd +conda create -n llm python=3.11 +conda activate llm + pip install --pre --upgrade ipex-llm[all] ``` diff --git a/python/llm/example/CPU/Deepspeed-AutoTP/install.sh b/python/llm/example/CPU/Deepspeed-AutoTP/install.sh index 463a94ff..d0f38c68 100644 --- a/python/llm/example/CPU/Deepspeed-AutoTP/install.sh +++ b/python/llm/example/CPU/Deepspeed-AutoTP/install.sh @@ -20,4 +20,4 @@ pip install deepspeed==0.11.1 # 4. exclude intel deepspeed extension, which is only for XPU pip uninstall intel-extension-for-deepspeed # 5. install ipex-llm -pip install --pre --upgrade ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md index b3078cbd..c409758b 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md @@ -33,16 +33,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a AWQ We suggest using conda to manage environment: +On Linux + ```bash conda create -n llm python=3.11 conda activate llm pip install autoawq==0.1.8 --no-deps -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35.0 pip install accelerate==0.25.0 pip install einops ``` +On Windows: +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install autoawq==0.1.8 --no-deps +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.35.0 +pip install accelerate==0.25.0 +pip install einops +``` + **Note: For Mixtral model, please use transformers 4.36.0:** ```bash pip install transformers==4.36.0 @@ -68,7 +83,7 @@ Arguments info: On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/README.md index 4741e604..9d82496e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GGUF/README.md @@ -24,19 +24,32 @@ In the example [generate.py](./generate.py), we show a basic use case to load a We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.36.0 # upgrade transformers ``` + +On Windows: +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.36.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --model --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/README.md index 139fa014..6d58ab89 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/README.md @@ -8,16 +8,31 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Llama2 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.34.0 BUILD_CUDA_EXT=0 pip install git+https://github.com/PanQiWei/AutoGPTQ.git@1de9ab6 pip install optimum==0.14.0 ``` +On Windows: +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.34.0 +set BUILD_CUDA_EXT=0 +pip install git+https://github.com/PanQiWei/AutoGPTQ.git@1de9ab6 +pip install optimum==0.14.0 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -34,7 +49,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/README.md index 618f1902..d47e95aa 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/README.md @@ -9,6 +9,6 @@ For OS, IPEX-LLM supports Ubuntu 20.04 or later (glibc>=2.17), CentOS 7 or later ## Best Known Configuration on Linux For better performance, it is recommended to set environment variables on Linux with the help of IPEX-LLM: ```bash -pip install ipex-llm +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu source ipex-llm-init ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/README.md index 8b3cfbf3..93f07a06 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila/README.md @@ -15,11 +15,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Aqui We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/README.md index fd06613c..730e7d47 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/aquila2/README.md @@ -15,11 +15,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Aqui We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/README.md index c6aee4d5..6c91201c 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan/README.md @@ -9,12 +9,14 @@ In the example [generate.py](./generate.py), we show a basic use case for a Baic ### 1. Install We suggest using conda to manage environment: + On Linux: ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers_stream_generator # additional package required for Baichuan-13B-Chat to conduct generation ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/README.md index e9e28200..d0ca32ee 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/baichuan2/README.md @@ -8,14 +8,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Baichuan model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + pip install transformers_stream_generator # additional package required for Baichuan-13B-Chat to conduct generation ``` +On Windows: +```cmd +onda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers_stream_generator +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -32,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/README.md index 328a86b7..dc0c3a7a 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/bluelm/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a BlueLM model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm/README.md index 9f79516b..1d7006b3 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm/README.md @@ -15,14 +15,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a Chat We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install "transformers<4.34.1" # chatglm cannot work with transformers 4.34.1+ ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install "transformers<4.34.1" +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -32,7 +46,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm2/README.md index 8a99eebe..bcb96b93 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm2/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a ChatGLM2 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` @@ -79,11 +92,24 @@ Inference time: xxxx s In the example [streamchat.py](./streamchat.py), we show a basic use case for a ChatGLM2 model to stream chat, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -108,7 +134,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd $env:PYTHONUNBUFFERED=1 # ensure stdout and stderr streams are sent straight to terminal without being first buffered python ./streamchat.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm3/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm3/README.md index 4b5f2174..059fad28 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm3/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/chatglm3/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a ChatGLM3 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` @@ -80,11 +93,24 @@ AI stands for Artificial Intelligence. It refers to the development of computer In the example [streamchat.py](./streamchat.py), we show a basic use case for a ChatGLM3 model to stream chat, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -109,7 +135,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd $env:PYTHONUNBUFFERED=1 # ensure stdout and stderr streams are sent straight to terminal without being first buffered python ./streamchat.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codegemma/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codegemma/README.md index a959d278..76b96e9a 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codegemma/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codegemma/README.md @@ -10,17 +10,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Code We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm # install ipex-llm with 'all' option -pip install ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # According to CodeGemma's requirement, please make sure you are using a stable version of Transformers, 4.38.1 or newer. pip install transformers==4.38.1 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.38.1 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -37,7 +51,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codellama/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codellama/README.md index 10035051..4dc252e7 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codellama/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codellama/README.md @@ -8,14 +8,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a CodeLlama model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.34.1 # CodeLlamaTokenizer is supported in higher version of transformers ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.34.1 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -32,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codeshell/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codeshell/README.md index a3399ab8..ea5fd312 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codeshell/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/codeshell/README.md @@ -15,11 +15,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Code We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def print_hello_world():' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/cohere/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/cohere/README.md index d104c84d..f75ac825 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/cohere/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/cohere/README.md @@ -8,12 +8,26 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a cohere model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option -pip install tansformers==4.40.0 +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install transformers==4.40.0 +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.40.0 ``` ### 2. Run @@ -32,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md index ac818695..02cb8cfc 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deciLM-7b/README.md @@ -8,14 +8,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a DeciLM-7B model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35.2 # required by DeciLM-7B ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.35.2 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -32,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek-moe/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek-moe/README.md index 3fd87ae7..ff3fc050 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek-moe/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek-moe/README.md @@ -15,14 +15,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a Deep We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for DeepSeek-MoE to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -35,7 +49,7 @@ You need to disable flash attention to run this model. To do this, simply replac #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek/README.md index e38600b7..1123c42c 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/deepseek/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Deepseek model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md index 92d863b1..4b57416c 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/distil-whisper/README.md @@ -11,14 +11,28 @@ In the example [recognize.py](./recognize.py), we show a basic use case for a Di We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install datasets soundfile librosa # required by audio processing ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install datasets soundfile librosa +``` + ### 2. Run ``` python ./recognize.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --repo-id-or-data-path REPO_ID_OR_DATA_PATH --language LANGUAGE --chunk-length CHUNK_LENGTH --batch-size BATCH_SIZE @@ -38,7 +52,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./recognize.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v1/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v1/README.md index 1e599b4e..55fa326a 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v1/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v1/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Dolly v1 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v2/README.md index b06f61cc..7409c57c 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/dolly_v2/README.md @@ -8,13 +8,25 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Dolly v2 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` - ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -31,7 +43,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/falcon/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/falcon/README.md index ca7b5f45..b50171ef 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/falcon/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/falcon/README.md @@ -9,14 +9,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Falcon model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for falcon-7b-instruct and falcon-40b-instruct to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. (Optional) Download Model and Replace File If you select the Falcon models ([tiiuae/falcon-7b-instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) or [tiiuae/falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct)), please note that their code (`modelling_RW.py`) does not support KV cache at the moment. To address issue, we have provided two updated files ([falcon-7b-instruct/modelling_RW.py](./falcon-7b-instruct/modelling_RW.py) and [falcon-40b-instruct/modelling_RW.py](./falcon-40b-instruct/modelling_RW.py)), which can be used to achieve the best performance using IPEX-LLM INT4 optimizations with KV cache support. After transformers 4.36, only transformer models are supported since remote code diverges from transformer model code, make sure set `trust_remote_code=False`. @@ -66,7 +80,7 @@ Arguments info: #### 3.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/flan-t5/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/flan-t5/README.md index 2daa684f..de95d858 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/flan-t5/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/flan-t5/README.md @@ -11,11 +11,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Flan We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -27,7 +40,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'Translate to German: My name is Arthur' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/fuyu/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/fuyu/README.md index 8bf15bd1..49942b01 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/fuyu/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/fuyu/README.md @@ -10,15 +10,29 @@ In the example [generate.py](./generate.py), we show a basic use case for an Fuy We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35 pillow # additional package required for Fuyu to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.35 pillow +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -28,7 +42,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --image-path demo.jpg ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md index f00acd8a..bb09f15e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/gemma/README.md @@ -11,16 +11,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Gemm We suggest using conda to manage the Python environment: After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # According to Gemma's requirement, please make sure you are using a stable version of Transformers, 4.38.1 or newer. pip install transformers==4.38.1 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.38.1 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -37,7 +52,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm-xcomposer/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm-xcomposer/README.md index 97235dd6..3deb7bb2 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm-xcomposer/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm-xcomposer/README.md @@ -10,16 +10,32 @@ In the example [chat.py](./chat.py), we show a basic use case for an InternLM_XC We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install accelerate timm==0.4.12 sentencepiece==0.1.99 gradio==3.44.4 markdown2==2.4.10 xlsxwriter==3.1.2 einops # additional package required for InternLM_XComposer to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install accelerate timm==0.4.12 sentencepiece==0.1.99 gradio==3.44.4 markdown2==2.4.10 xlsxwriter==3.1.2 einops + +``` + ### 2. Download Model and Replace File If you select the InternLM_XComposer model ([internlm/internlm-xcomposer-vl-7b](https://huggingface.co/internlm/internlm-xcomposer-vl-7b)), please note that their code (`modeling_InternLM_XComposer.py`) does not support inference on CPU. To address this issue, we have provided the updated file ([internlm-xcomposer-vl-7b/modeling_InternLM_XComposer.py](./internlm-xcomposer-vl-7b/modeling_InternLM_XComposer.py), which can be used to conduct inference on CPU. @@ -49,7 +65,7 @@ After setting up the Python environment, you could run the example by following #### 3.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./chat.py --image-path demo.jpg ``` More information about arguments can be found in [Arguments Info](#33-arguments-info) section. The expected output can be found in [Sample Output](#34-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm/README.md index 29cb4f6b..c175a943 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a InternLM model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm2/README.md index 01f399b9..04759aa0 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/internlm2/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a InternLM2 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama2/README.md index 68415979..098a32a2 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama2/README.md @@ -8,13 +8,25 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Llama2 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` - ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -31,7 +43,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama3/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama3/README.md index 266aa1b6..48029ad5 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama3/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama3/README.md @@ -8,13 +8,27 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Llama3 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # transformers>=4.33.0 is required for Llama3 with IPEX-LLM optimizations +pip install transformers==4.37.0 +``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + pip install transformers==4.37.0 ``` @@ -34,7 +48,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mistral/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mistral/README.md index d27fc1e7..78abbe27 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mistral/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mistral/README.md @@ -12,16 +12,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Mist We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Refer to https://huggingface.co/mistralai/Mistral-7B-v0.1#troubleshooting, please make sure you are using a stable version of Transformers, 4.34.0 or newer. pip install transformers==4.34.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.34.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -31,7 +46,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mixtral/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mixtral/README.md index 0f9ce865..6514817e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mixtral/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mixtral/README.md @@ -12,18 +12,30 @@ In the example [generate.py](./generate.py), we show a basic use case for a Mixt We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -# below command will install PyTorch CPU as default -pip install torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu -pip install --pre --upgrade ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Please make sure you are using a stable version of Transformers, 4.36.0 or newer. pip install transformers==4.36.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.36.0 +``` + ### 2. Run ```bash diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/moss/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/moss/README.md index 0355daa9..2fc9e205 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/moss/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/moss/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a MOSS model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install "transformers<4.34" +``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install "transformers<4.34" ``` @@ -33,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mpt/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mpt/README.md index 5efb7172..e546eb9d 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mpt/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/mpt/README.md @@ -8,14 +8,27 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for an MPT model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for mpt-7b-chat and mpt-30b-chat to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md index e92d306b..e3c32c74 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-1_5/README.md @@ -15,14 +15,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for phi-1_5 to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -32,7 +46,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-2/README.md index 10cebf03..b211fd95 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-2/README.md @@ -15,13 +15,26 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for phi-2 to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -32,7 +45,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-3/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-3/README.md index 8794d02c..ff9f870b 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-3/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phi-3/README.md @@ -15,11 +15,25 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + +pip install transformers==4.37.0 +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install transformers==4.37.0 ``` @@ -33,7 +47,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phixtral/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phixtral/README.md index 2696aeb3..76563a99 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phixtral/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phixtral/README.md @@ -15,14 +15,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a phix We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for phi to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -32,7 +46,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phoenix/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phoenix/README.md index 9b162d2f..23651bc6 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phoenix/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/phoenix/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Phoenix model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md index 16f5243c..777c60df 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md @@ -10,22 +10,38 @@ In the example [chat.py](./chat.py), we show a basic use case for a Qwen-VL mode We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib + +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./chat.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md index c94d76a3..cee06098 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md @@ -14,14 +14,27 @@ In the example [generate.py](./generate.py), we show a basic use case for a Qwen We suggest using conda to manage environment: +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install tiktoken einops transformers_stream_generator # additional package required for Qwen-7B-Chat to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install tiktoken einops transformers_stream_generator +``` + ### 2. Run The minimum Qwen model version currently supported by IPEX-LLM is the version on November 30, 2023. @@ -44,7 +57,7 @@ Arguments info: On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md index 66040d40..bd7c5842 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen1.5/README.md @@ -9,11 +9,15 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Qwen model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.37.0 # install the transformers which support Qwen2 # only for Qwen1.5-MoE-A2.7B @@ -21,6 +25,20 @@ pip install transformers==4.40.0 pip install trl==0.8.1 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.37.0 + +REM For Qwen1.5-MoE-A2.7B +pip install transformers==4.40.0 +pip install trl==0.8.1 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -37,7 +55,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/redpajama/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/redpajama/README.md index 0e9e0c38..9dbebc4e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/redpajama/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/redpajama/README.md @@ -9,11 +9,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a RedPajama model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -32,7 +45,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/replit/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/replit/README.md index 285b8040..7a973a18 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/replit/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/replit/README.md @@ -10,11 +10,25 @@ In the example [generate.py](./generate.py), we show a basic use case for an Rep We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install "transformers<4.35" +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install "transformers<4.35" ``` @@ -23,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def print_hello_world():' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/skywork/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/skywork/README.md index 75f81fd8..993f13e1 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/skywork/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/skywork/README.md @@ -8,13 +8,25 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Skywork model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` - ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -31,7 +43,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/solar/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/solar/README.md index 51c1a6b6..5de0568e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/solar/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/solar/README.md @@ -8,14 +8,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a SOLAR model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35.2 # required by SOLAR ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.35.2 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -32,7 +46,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/stablelm/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/stablelm/README.md index d3e9854a..64436a4a 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/stablelm/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/stablelm/README.md @@ -10,16 +10,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Stab We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Refer to https://huggingface.co/stabilityai/stablelm-zephyr-3b/blob/8b471c751c0e78cb46cf9f47738dd0eb45392071/config.json#L21, please make sure you are using a stable version of Transformers, 4.38.0 or newer. pip install transformers==4.38.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.38.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -29,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/starcoder/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/starcoder/README.md index 20cc936f..ed78c34e 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/starcoder/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/starcoder/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for an StarCoder model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/vicuna/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/vicuna/README.md index 9ed7ac15..9b00b305 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/vicuna/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/vicuna/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a Vicuna model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/whisper/readme.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/whisper/readme.md index 29f72a29..503fce61 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/whisper/readme.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/whisper/readme.md @@ -9,14 +9,28 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [recognize.py](./recognize.py), we show a basic use case for a Whisper model to conduct transcription using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install datasets soundfile librosa # required by audio processing ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install datasets soundfile librosa +``` + ### 2. Run ``` python ./recognize.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --repo-id-or-data-path REPO_ID_OR_DATA_PATH --language LANGUAGE @@ -34,7 +48,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./recognize.py ``` @@ -65,11 +79,25 @@ Inference time: xxxx s In the example [long-segment-recognize.py](./long-segment-recognize.py), we show a basic use case for a Whisper model to conduct transcription using `pipeline()` API for long audio input, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install datasets soundfile librosa # required by audio processing +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install datasets soundfile librosa # required by audio processing ``` @@ -92,7 +120,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd # Long Segment Recognize python ./long-segment-recognize.py --audio-file /PATH/TO/AUDIO_FILE ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/wizardcoder-python/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/wizardcoder-python/README.md index 1801214a..6864775b 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/wizardcoder-python/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/wizardcoder-python/README.md @@ -8,11 +8,24 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a WizardCoder-Python model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -31,7 +44,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yi/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yi/README.md index 829af83f..b3ea29d4 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yi/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yi/README.md @@ -10,14 +10,28 @@ In the example [generate.py](./generate.py), we show a basic use case for an Yi We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for Yi-6B to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -34,7 +48,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yuan2/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yuan2/README.md index 96c08614..d39cdf5b 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yuan2/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/yuan2/README.md @@ -12,15 +12,30 @@ In the example [generate.py](./generate.py), we show a basic use case for an Yua We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for Yuan2 to conduct generation pip install pandas # additional package required for Yuan2 to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +pip install pandas +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -37,7 +52,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/ziya/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/ziya/README.md index 9d1fa08c..ff56fba8 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/ziya/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/ziya/README.md @@ -15,14 +15,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a Ziya We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for Ziya to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -32,7 +46,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def quick_sort(arr):\n' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types/README.md index 93284b2e..d97d51fb 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types/README.md @@ -8,7 +8,7 @@ We suggest using conda to manage environment: conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu ``` ## Run Example diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load/README.md index 93284b2e..d97d51fb 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load/README.md @@ -8,7 +8,7 @@ We suggest using conda to manage environment: conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu ``` ## Run Example diff --git a/python/llm/example/CPU/ModelScope-Models/README.md b/python/llm/example/CPU/ModelScope-Models/README.md index d416a8ea..11df8052 100644 --- a/python/llm/example/CPU/ModelScope-Models/README.md +++ b/python/llm/example/CPU/ModelScope-Models/README.md @@ -9,15 +9,29 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a ChatGLM3 model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +# install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Refer to https://github.com/modelscope/modelscope/issues/765, please make sure you are using 1.11.0 version pip install modelscope==1.11.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install modelscope==1.11.0 +``` + ### 2. Run ``` python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT @@ -34,7 +48,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/Native-Models/README.md b/python/llm/example/CPU/Native-Models/README.md index 1a2d80a8..c235ba1a 100644 --- a/python/llm/example/CPU/Native-Models/README.md +++ b/python/llm/example/CPU/Native-Models/README.md @@ -6,7 +6,20 @@ In this example, we show a pipeline to convert a large language model to IPEX-LL ## Prepare Environment We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm diff --git a/python/llm/example/CPU/PyTorch-Models/Model/aquila2/README.md b/python/llm/example/CPU/PyTorch-Models/Model/aquila2/README.md index 2c9cd008..50526cb7 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/aquila2/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/aquila2/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Aqui We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/bark/README.md b/python/llm/example/CPU/PyTorch-Models/Model/bark/README.md index ba4f4282..e014a3e1 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/bark/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/bark/README.md @@ -10,11 +10,26 @@ In the example [synthesize_speech.py](./synthesize_speech.py), we show a basic u We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install TTS scipy +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install TTS scipy ``` @@ -34,7 +49,7 @@ After setting up the Python environment and downloading Bark model, you could ru #### 3.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd # make sure `--model-path` corresponds to the local folder of downloaded model python ./synthesize_speech.py --model-path 'bark/' --text "This is an example text for synthesize speech." ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/bert/README.md b/python/llm/example/CPU/PyTorch-Models/Model/bert/README.md index 5bfe4e00..bf9eee36 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/bert/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/bert/README.md @@ -10,19 +10,31 @@ In the example [extract_feature.py](./extract_feature.py), we show a basic use c We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +``` ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./extract_feature.py --text 'This is an example text for feature extraction.' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/bluelm/README.md b/python/llm/example/CPU/PyTorch-Models/Model/bluelm/README.md index a68f2cb8..e6f33415 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/bluelm/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/bluelm/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Blue We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/chatglm/README.md b/python/llm/example/CPU/PyTorch-Models/Model/chatglm/README.md index bd5a3167..a387980d 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/chatglm/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/chatglm/README.md @@ -10,12 +10,27 @@ In the example [generate.py](./generate.py), we show a basic use case for a Chat We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option -pip install "transformers<4.34.1" # chatglm cannot work with transformers 4.34.1+ +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install "transformers<4.34.1" +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install "transformers<4.34.1" ``` ### 2. Run @@ -23,7 +38,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/chatglm3/README.md b/python/llm/example/CPU/PyTorch-Models/Model/chatglm3/README.md index 3ee550a4..736e3ce7 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/chatglm3/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/chatglm3/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Chat We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'AI是什么?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/codegemma/README.md b/python/llm/example/CPU/PyTorch-Models/Model/codegemma/README.md index 22bdabc5..d0edbf94 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/codegemma/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/codegemma/README.md @@ -10,23 +10,36 @@ In the example [generate.py](./generate.py), we show a basic use case for a Code We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -# install ipex-llm with 'all' option -pip install --pre --upgrade ipex-llm[all] - +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # According to CodeGemma's requirement, please make sure you are using a stable version of Transformers, 4.38.1 or newer. pip install transformers==4.38.1 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.38.1 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/codellama/README.md b/python/llm/example/CPU/PyTorch-Models/Model/codellama/README.md index 9915ffd9..8504713b 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/codellama/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/codellama/README.md @@ -10,20 +10,35 @@ In the example [generate.py](./generate.py), we show a basic use case for a Code We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.34.1 # CodeLlamaTokenizer is supported in higher version of transformers ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.34.1 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def print_hello_world():' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/codeshell/README.md b/python/llm/example/CPU/PyTorch-Models/Model/codeshell/README.md index dff6f8e8..2b9b9c1d 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/codeshell/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/codeshell/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Code We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def print_hello_world():' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/cohere/README.md b/python/llm/example/CPU/PyTorch-Models/Model/cohere/README.md index d104c84d..2abcc99e 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/cohere/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/cohere/README.md @@ -8,12 +8,27 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [generate.py](./generate.py), we show a basic use case for a cohere model to predict the next N tokens using `generate()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install transformers==4.40.0 +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option -pip install tansformers==4.40.0 +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.40.0 ``` ### 2. Run @@ -32,7 +47,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/deciLM-7b/README.md b/python/llm/example/CPU/PyTorch-Models/Model/deciLM-7b/README.md index bf92a5b6..62c89a57 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/deciLM-7b/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/deciLM-7b/README.md @@ -10,20 +10,34 @@ In the example [generate.py](./generate.py), we show a basic use case for a Deci We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35.2 # required by DeciLM-7B ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.35.2 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/deepseek-moe/README.md b/python/llm/example/CPU/PyTorch-Models/Model/deepseek-moe/README.md index fa9b9945..e0ed0059 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/deepseek-moe/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/deepseek-moe/README.md @@ -10,11 +10,25 @@ In the example [generate.py](./generate.py), we show a basic use case for a deep We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install einops +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install einops ``` @@ -23,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/deepseek/README.md b/python/llm/example/CPU/PyTorch-Models/Model/deepseek/README.md index 9e86fa27..88315963 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/deepseek/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/deepseek/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Deep We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/distil-whisper/README.md b/python/llm/example/CPU/PyTorch-Models/Model/distil-whisper/README.md index ff777d77..2ab17e17 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/distil-whisper/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/distil-whisper/README.md @@ -11,12 +11,26 @@ In the example [recognize.py](./recognize.py), we show a basic use case for a Di We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install datasets soundfile librosa # required by audio processing +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option -pip install datasets soundfile librosa # required by audio processing +pip install --pre --upgrade ipex-llm[all] +pip install datasets soundfile librosa ``` ### 2. Run @@ -28,7 +42,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./recognize.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/flan-t5/README.md b/python/llm/example/CPU/PyTorch-Models/Model/flan-t5/README.md index 2daa684f..de95d858 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/flan-t5/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/flan-t5/README.md @@ -11,11 +11,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Flan We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -27,7 +40,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'Translate to German: My name is Arthur' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/fuyu/README.md b/python/llm/example/CPU/PyTorch-Models/Model/fuyu/README.md index 8bf15bd1..84de7835 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/fuyu/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/fuyu/README.md @@ -10,15 +10,30 @@ In the example [generate.py](./generate.py), we show a basic use case for an Fuy We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35 pillow # additional package required for Fuyu to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.35 pillow +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -28,7 +43,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --image-path demo.jpg ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/internlm-xcomposer/README.md b/python/llm/example/CPU/PyTorch-Models/Model/internlm-xcomposer/README.md index eda342d8..bc27022f 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/internlm-xcomposer/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/internlm-xcomposer/README.md @@ -10,14 +10,28 @@ In the example [chat.py](./chat.py), we show a basic use case for an InternLM_XC We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install accelerate timm==0.4.12 sentencepiece==0.1.99 gradio==3.44.4 markdown2==2.4.10 xlsxwriter==3.1.2 einops # additional package required for InternLM_XComposer to conduct generation +``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install accelerate timm==0.4.12 sentencepiece==0.1.99 gradio==3.44.4 markdown2==2.4.10 xlsxwriter==3.1.2 einops ``` ### 2. Download Model and Replace File @@ -49,7 +63,7 @@ After setting up the Python environment, you could run the example by following #### 3.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./chat.py --image-path demo.jpg ``` More information about arguments can be found in [Arguments Info](#33-arguments-info) section. The expected output can be found in [Sample Output](#34-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/internlm2/README.md b/python/llm/example/CPU/PyTorch-Models/Model/internlm2/README.md index f8c1ff8c..02413270 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/internlm2/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/internlm2/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Inte We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/llama2/README.md b/python/llm/example/CPU/PyTorch-Models/Model/llama2/README.md index 2227e0dc..2d56b03f 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/llama2/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/llama2/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Llam We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/llama3/README.md b/python/llm/example/CPU/PyTorch-Models/Model/llama3/README.md index 0f9de282..f50a7ebf 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/llama3/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/llama3/README.md @@ -10,22 +10,37 @@ In the example [generate.py](./generate.py), we show a basic use case for a Llam We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # transformers>=4.33.0 is required for Llama3 with IPEX-LLM optimizations pip install transformers==4.37.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.37.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/llava/README.md b/python/llm/example/CPU/PyTorch-Models/Model/llava/README.md index 0dde00f1..08ea2c0e 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/llava/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/llava/README.md @@ -11,11 +11,15 @@ In the example [generate.py](./generate.py), we show a basic use case for a LLaV We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # install dependencies required by llava pip install transformers==4.36.2 @@ -25,6 +29,22 @@ cd LLaVA # change the working directory to the LLaVA folder git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +pip install transformers==4.36.2 + +git clone https://github.com/haotian-liu/LLaVA.git +copy generate.py .\LLaVA\ +cd LLaVA +git checkout tags/v1.2.0 -b 1.2.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -34,7 +54,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --image-path-or-url 'https://llava-vl.github.io/static/images/monalisa.jpg' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/mamba/README.md b/python/llm/example/CPU/PyTorch-Models/Model/mamba/README.md index 5950791f..bd47c7b2 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/mamba/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/mamba/README.md @@ -10,20 +10,34 @@ In the example [generate.py](./generate.py), we show a basic use case for a Mamb We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # package required by Mamba ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/meta-llama/README.md b/python/llm/example/CPU/PyTorch-Models/Model/meta-llama/README.md index 4c0ccb20..6075408b 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/meta-llama/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/meta-llama/README.md @@ -9,6 +9,9 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [example_chat_completion.py](./example_chat_completion.py), we show a basic use case for a Llama model to engage in a conversation with an AI assistant using `chat_completion` API, with IPEX-LLM INT4 optimizations. The process for [example_text_completion.py](./example_text_completion.py) is similar. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm @@ -19,8 +22,22 @@ cd llama/ git apply < ../cpu.patch # apply cpu version patch pip install -e . -cd - -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +git clone https://github.com/facebookresearch/llama.git +cd llama/ +git apply < ../cpu.patch +pip install -e . + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -46,7 +63,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd torchrun --nproc-per-node 1 example_chat_completion.py --ckpt_dir llama-2-7b-chat/ --tokenizer_path tokenizer.model --max_seq_len 64 --max_batch_size 1 --backend cpu ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/mistral/README.md b/python/llm/example/CPU/PyTorch-Models/Model/mistral/README.md index 8a4adbcd..e058a716 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/mistral/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/mistral/README.md @@ -12,22 +12,35 @@ In the example [generate.py](./generate.py), we show a basic use case for a Mist We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option - +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Refer to https://huggingface.co/mistralai/Mistral-7B-v0.1#troubleshooting, please make sure you are using a stable version of Transformers, 4.34.0 or newer. pip install transformers==4.34.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.34.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/mixtral/README.md b/python/llm/example/CPU/PyTorch-Models/Model/mixtral/README.md index bc8ee08e..6bbcc008 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/mixtral/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/mixtral/README.md @@ -12,18 +12,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Mixt We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -# below command will install PyTorch CPU as default -pip install torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu -pip install --pre --upgrade ipex-llm[all] +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + # Please make sure you are using a stable version of Transformers, 4.36.0 or newer. pip install transformers==4.36.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.36.0 +``` + ### 2. Run ```bash diff --git a/python/llm/example/CPU/PyTorch-Models/Model/openai-whisper/readme.md b/python/llm/example/CPU/PyTorch-Models/Model/openai-whisper/readme.md index a1def711..b548107f 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/openai-whisper/readme.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/openai-whisper/readme.md @@ -9,15 +9,30 @@ To run these examples with IPEX-LLM, we have some recommended requirements for y In the example [recognize.py](./recognize.py), we show a basic use case for a Whisper model to conduct transcription using `transcribe()` API, with IPEX-LLM INT4 optimizations. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash conda create -n llm python=3.11 conda activate llm -pip install ipex-llm[all] # install ipex-llm with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install -U openai-whisper pip install librosa # required by audio processing ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install -U openai-whisper +pip install librosa +``` + ### 2. Run ``` python ./recognize.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --repo-id-or-data-path REPO_ID_OR_DATA_PATH --language LANGUAGE @@ -35,7 +50,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./recognize.py --audio-file /PATH/TO/AUDIO_FILE ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/phi-1_5/README.md b/python/llm/example/CPU/PyTorch-Models/Model/phi-1_5/README.md index 3b4dfac1..65be1eca 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/phi-1_5/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/phi-1_5/README.md @@ -10,11 +10,25 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install einops +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install einops ``` @@ -23,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/phi-2/README.md b/python/llm/example/CPU/PyTorch-Models/Model/phi-2/README.md index 81355b62..2320490d 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/phi-2/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/phi-2/README.md @@ -10,11 +10,25 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install einops +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install einops ``` @@ -23,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/phi-3/README.md b/python/llm/example/CPU/PyTorch-Models/Model/phi-3/README.md index f9bb937f..66b9eac9 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/phi-3/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/phi-3/README.md @@ -15,11 +15,26 @@ In the example [generate.py](./generate.py), we show a basic use case for a phi- We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + +pip install transformers==4.37.0 +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install transformers==4.37.0 ``` @@ -29,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/phixtral/README.md b/python/llm/example/CPU/PyTorch-Models/Model/phixtral/README.md index 9f824fad..3daadbad 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/phixtral/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/phixtral/README.md @@ -10,11 +10,25 @@ In the example [generate.py](./generate.py), we show a basic use case for a phix We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install einops +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] pip install einops ``` @@ -23,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md index 0e2c21cf..b28d49e6 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md @@ -10,14 +10,26 @@ In the example [chat.py](./chat.py), we show a basic use case for a Qwen-VL mode We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option - +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation +``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib ``` ### 2. Run @@ -25,7 +37,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./chat.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/qwen1.5/README.md b/python/llm/example/CPU/PyTorch-Models/Model/qwen1.5/README.md index 9a2b41b9..7841702b 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/qwen1.5/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/qwen1.5/README.md @@ -10,11 +10,15 @@ In the example [generate.py](./generate.py), we show a basic use case for a Qwen We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.37.0 # install transformers which supports Qwen2 # only for Qwen1.5-MoE-A2.7B @@ -22,12 +26,26 @@ pip install transformers==4.40.0 pip install trl==0.8.1 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.37.0 + +REM for Qwen1.5-MoE-A2.7B +pip install transformers==4.40.0 +pip install trl==0.8.1 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/skywork/README.md b/python/llm/example/CPU/PyTorch-Models/Model/skywork/README.md index b1b21407..71277e69 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/skywork/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/skywork/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Skyw We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/solar/README.md b/python/llm/example/CPU/PyTorch-Models/Model/solar/README.md index 44c2ae4b..89bea91e 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/solar/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/solar/README.md @@ -10,20 +10,34 @@ In the example [generate.py](./generate.py), we show a basic use case for a SOLA We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install transformers==4.35.2 # required by SOLAR ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install transformers==4.35.2 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt PROMPT --n-predict N_PREDICT ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/stablelm/README.md b/python/llm/example/CPU/PyTorch-Models/Model/stablelm/README.md index 8934e3f8..d2a44a25 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/stablelm/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/stablelm/README.md @@ -10,16 +10,31 @@ In the example [generate.py](./generate.py), we show a basic use case for a Stab We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu # Refer to https://huggingface.co/stabilityai/stablelm-zephyr-3b/blob/8b471c751c0e78cb46cf9f47738dd0eb45392071/config.json#L21, please make sure you are using a stable version of Transformers, 4.38.0 or newer. pip install transformers==4.38.0 ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] + +pip install transformers==4.38.0 +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -29,7 +44,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'What is AI?' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/wizardcoder-python/README.md b/python/llm/example/CPU/PyTorch-Models/Model/wizardcoder-python/README.md index e4f99c47..49f903c5 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/wizardcoder-python/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/wizardcoder-python/README.md @@ -10,11 +10,24 @@ In the example [generate.py](./generate.py), we show a basic use case for a Wiza We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run @@ -22,7 +35,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/yi/README.md b/python/llm/example/CPU/PyTorch-Models/Model/yi/README.md index 89adf93a..c7eb8f27 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/yi/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/yi/README.md @@ -11,14 +11,28 @@ In the example [generate.py](./generate.py), we show a basic use case for a Yi m We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for Yi-6B to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. @@ -28,7 +42,7 @@ After setting up the Python environment, you could run the example by following #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/Model/yuan2/README.md b/python/llm/example/CPU/PyTorch-Models/Model/yuan2/README.md index 3627e815..403abc05 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/yuan2/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/yuan2/README.md @@ -12,13 +12,28 @@ In the example [generate.py](./generate.py), we show a basic use case for an Yua We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install einops # additional package required for Yuan2 to conduct generation +pip install pandas # additional package required for Yuan2 to conduct generation +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option -pip install einops # additional package required for Yuan2 to conduct generation -pip install pandas # additional package required for Yuan2 to conduct generation +pip install --pre --upgrade ipex-llm[all] +pip install einops +pip install pandas ``` ### 2. Run @@ -33,7 +48,7 @@ Arguments info: #### 2.1 Client On client Windows machine, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/ziya/README.md b/python/llm/example/CPU/PyTorch-Models/Model/ziya/README.md index 79ac293d..ea43f9d3 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/ziya/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/ziya/README.md @@ -10,19 +10,33 @@ In the example [generate.py](./generate.py), we show a basic use case for a Ziya We suggest using conda to manage the Python environment. For more information about conda installation, please refer to [here](https://docs.conda.io/en/latest/miniconda.html#). After installing conda, create a Python environment for IPEX-LLM: + +On Linux: + ```bash conda create -n llm python=3.11 # recommend to use Python 3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu pip install einops # additional package required for Ziya to conduct generation ``` +On Windows: + +```cmd +conda create -n llm python=3.11 +conda activate llm + +pip install --pre --upgrade ipex-llm[all] +pip install einops +``` + ### 2. Run After setting up the Python environment, you could run the example by following steps. #### 2.1 Client On client Windows machines, it is recommended to run directly with full utilization of all cores: -```powershell +```cmd python ./generate.py --prompt 'def quick_sort(arr):\n' ``` More information about arguments can be found in [Arguments Info](#23-arguments-info) section. The expected output can be found in [Sample Output](#24-sample-output) section. diff --git a/python/llm/example/CPU/PyTorch-Models/More-Data-Types/README.md b/python/llm/example/CPU/PyTorch-Models/More-Data-Types/README.md index 4bbfb55e..5f2cc33e 100644 --- a/python/llm/example/CPU/PyTorch-Models/More-Data-Types/README.md +++ b/python/llm/example/CPU/PyTorch-Models/More-Data-Types/README.md @@ -9,11 +9,24 @@ To run this example with IPEX-LLM, we have some recommended requirements for you In the example [generate.py](./generate.py), we show a basic use case of low-bit optimizations (including INT8/INT5/INT4) on a Llama2 model to predict the next N tokens using `generate()` API. By specifying `--low-bit` argument, you could apply other low-bit optimization (e.g. INT8/INT5) on model. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run diff --git a/python/llm/example/CPU/PyTorch-Models/Save-Load/README.md b/python/llm/example/CPU/PyTorch-Models/Save-Load/README.md index f3bbb5cf..9570a394 100644 --- a/python/llm/example/CPU/PyTorch-Models/Save-Load/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Save-Load/README.md @@ -9,11 +9,24 @@ To run this example with IPEX-LLM, we have some recommended requirements for you In the example [generate.py](./generate.py), we show a basic use case of saving/loading model in low-bit optimizations to predict the next N tokens using `generate()` API. Also, saving and loading operations are platform-independent, so you could run it on different platforms. ### 1. Install We suggest using conda to manage environment: + +On Linux: + ```bash +conda create -n llm python=3.11 # recommend to use Python 3.11 +conda activate llm + +# install the latest ipex-llm nightly build with 'all' option +pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +``` + +On Windows: + +```cmd conda create -n llm python=3.11 conda activate llm -pip install --pre --upgrade ipex-llm[all] # install ipex-llm with 'all' option +pip install --pre --upgrade ipex-llm[all] ``` ### 2. Run diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md index ab6947d4..4214da3f 100644 --- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md +++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/cohere/README.md @@ -17,7 +17,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install tansformers==4.40.0 +pip install transformers==4.40.0 conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc ``` @@ -29,7 +29,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install tansformers==4.40.0 +pip install transformers==4.40.0 ``` ### 2. Configures OneAPI environment variables for Linux diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md index 2bafc0e3..192d31b8 100644 --- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md +++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/mpt/README.md @@ -27,7 +27,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install einops # additional package required for mpt-7b-chat and mpt-30b-chat to conduct generation +pip install einops # additional package required for mpt-7b-chat and mpt-30b-chat to conduct generation ``` ### 2. Configures OneAPI environment variables for Linux diff --git a/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md b/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md index ab6947d4..4214da3f 100644 --- a/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md +++ b/python/llm/example/GPU/PyTorch-Models/Model/cohere/README.md @@ -17,7 +17,7 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install tansformers==4.40.0 +pip install transformers==4.40.0 conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc ``` @@ -29,7 +29,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install tansformers==4.40.0 +pip install transformers==4.40.0 ``` ### 2. Configures OneAPI environment variables for Linux