From acd77d9e87792166679e90f6a55292be76adc768 Mon Sep 17 00:00:00 2001
From: "Chu,Youcheng" <1340390339@qq.com>
Date: Wed, 27 Nov 2024 11:16:36 +0800
Subject: [PATCH] Remove env variable `BIGDL_LLM_XMX_DISABLED` in documentation
(#12445)
* fix: remove BIGDL_LLM_XMX_DISABLED in mddocs
* fix: remove set SYCL_CACHE_PERSISTENT=1 in example
* fix: remove BIGDL_LLM_XMX_DISABLED in workflows
* fix: merge igpu and A-series Graphics
* fix: remove set BIGDL_LLM_XMX_DISABLED=1 in example
* fix: remove BIGDL_LLM_XMX_DISABLED in workflows
* fix: merge igpu and A-series Graphics
* fix: textual adjustment
* fix: textual adjustment
* fix: textual adjustment
---
.github/workflows/llm_performance_tests.yml | 34 -------------------
.../docker_pytorch_inference_gpu.md | 4 ---
docs/mddocs/Overview/install_gpu.md | 9 +----
.../mddocs/Quickstart/benchmark_quickstart.md | 19 ++---------
docs/mddocs/Quickstart/install_linux_gpu.md | 1 -
.../Quickstart/install_linux_gpu.zh-CN.md | 1 -
docs/mddocs/Quickstart/install_windows_gpu.md | 11 ++----
.../Quickstart/install_windows_gpu.zh-CN.md | 9 +----
docs/mddocs/Quickstart/webui_quickstart.md | 5 ---
.../GPU/HuggingFace/LLM/aquila/README.md | 15 ++------
.../GPU/HuggingFace/LLM/aquila2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/baichuan/README.md | 15 ++------
.../GPU/HuggingFace/LLM/baichuan2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/bluelm/README.md | 15 ++------
.../GPU/HuggingFace/LLM/chatglm2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/chatglm3/README.md | 15 ++------
.../HuggingFace/LLM/chinese-llama2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/codegeex2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/codegemma/README.md | 15 ++------
.../GPU/HuggingFace/LLM/codellama/readme.md | 15 ++------
.../GPU/HuggingFace/LLM/deciLM-7b/README.md | 15 ++------
.../GPU/HuggingFace/LLM/deepseek/README.md | 15 ++------
.../GPU/HuggingFace/LLM/dolly-v1/README.md | 15 ++------
.../GPU/HuggingFace/LLM/dolly-v2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/falcon/README.md | 15 ++------
.../GPU/HuggingFace/LLM/flan-t5/README.md | 15 ++------
.../GPU/HuggingFace/LLM/gemma/README.md | 15 ++------
.../GPU/HuggingFace/LLM/gemma2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/glm4/README.md | 15 ++------
.../GPU/HuggingFace/LLM/gpt-j/readme.md | 15 ++------
.../GPU/HuggingFace/LLM/internlm/README.md | 15 ++------
.../GPU/HuggingFace/LLM/internlm2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/llama2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/llama3.1/README.md | 15 ++------
.../GPU/HuggingFace/LLM/llama3.2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/llama3/README.md | 15 ++------
.../GPU/HuggingFace/LLM/minicpm/README.md | 15 ++------
.../GPU/HuggingFace/LLM/minicpm3/README.md | 15 ++------
.../GPU/HuggingFace/LLM/mistral/README.md | 15 ++------
.../GPU/HuggingFace/LLM/mixtral/README.md | 15 ++------
.../example/GPU/HuggingFace/LLM/mpt/README.md | 15 ++------
.../GPU/HuggingFace/LLM/phi-1_5/README.md | 15 ++------
.../GPU/HuggingFace/LLM/phi-2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/phi-3/README.md | 15 ++------
.../GPU/HuggingFace/LLM/phixtral/README.md | 15 ++------
.../GPU/HuggingFace/LLM/qwen/README.md | 15 ++------
.../GPU/HuggingFace/LLM/qwen1.5/README.md | 15 ++------
.../GPU/HuggingFace/LLM/qwen2.5/README.md | 15 ++------
.../GPU/HuggingFace/LLM/qwen2/README.md | 15 ++------
.../GPU/HuggingFace/LLM/redpajama/README.md | 15 ++------
.../GPU/HuggingFace/LLM/replit/README.md | 15 ++------
.../GPU/HuggingFace/LLM/rwkv4/README.md | 15 ++------
.../GPU/HuggingFace/LLM/rwkv5/README.md | 15 ++------
.../GPU/HuggingFace/LLM/solar/README.md | 15 ++------
.../GPU/HuggingFace/LLM/stablelm/README.md | 15 ++------
.../GPU/HuggingFace/LLM/starcoder/readme.md | 15 ++------
.../GPU/HuggingFace/LLM/vicuna/README.md | 15 ++------
.../example/GPU/HuggingFace/LLM/yi/README.md | 15 ++------
.../GPU/HuggingFace/LLM/yuan2/README.md | 15 ++------
.../Multimodal/MiniCPM-Llama3-V-2_5/README.md | 15 ++------
.../Multimodal/MiniCPM-V-2/README.md | 15 ++------
.../Multimodal/MiniCPM-V-2_6/README.md | 15 ++------
.../Multimodal/MiniCPM-V/README.md | 15 ++------
.../Multimodal/StableDiffusion/README.md | 15 ++------
.../Multimodal/distil-whisper/README.md | 15 ++------
.../HuggingFace/Multimodal/glm-4v/README.md | 15 ++------
.../Multimodal/internvl2/readme.md | 15 ++------
.../Multimodal/phi-3-vision/README.md | 15 ++------
.../HuggingFace/Multimodal/qwen-vl/README.md | 15 ++------
.../Multimodal/qwen2-audio/README.md | 15 ++------
.../Multimodal/voiceassistant/README.md | 15 ++------
.../HuggingFace/Multimodal/whisper/readme.md | 15 ++------
.../GPU/HuggingFace/Save-Load/README.md | 15 ++------
python/llm/example/GPU/LangChain/README.md | 15 ++------
.../example/GPU/Lightweight-Serving/README.md | 15 ++------
python/llm/example/GPU/LlamaIndex/README.md | 15 ++------
.../GPU/Long-Context/Chatglm3-32K/README.md | 15 ++------
.../GPU/Long-Context/LLaMA2-32K/README.md | 15 ++------
.../example/GPU/ModelScope-Models/README.md | 15 ++------
.../GPU/ModelScope-Models/Save-Load/README.md | 15 ++------
.../PyTorch-Models/Model/aquila2/README.md | 15 ++------
.../PyTorch-Models/Model/baichuan/README.md | 15 ++------
.../PyTorch-Models/Model/baichuan2/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/bark/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/bluelm/README.md | 15 ++------
.../PyTorch-Models/Model/chatglm2/README.md | 15 ++------
.../PyTorch-Models/Model/chatglm3/README.md | 15 ++------
.../PyTorch-Models/Model/codegeex2/README.md | 15 ++------
.../PyTorch-Models/Model/codegemma/README.md | 15 ++------
.../PyTorch-Models/Model/codellama/README.md | 15 ++------
.../PyTorch-Models/Model/deciLM-7b/README.md | 15 ++------
.../PyTorch-Models/Model/deepseek/README.md | 15 ++------
.../Model/distil-whisper/README.md | 15 ++------
.../PyTorch-Models/Model/dolly-v1/README.md | 15 ++------
.../PyTorch-Models/Model/dolly-v2/README.md | 15 ++------
.../PyTorch-Models/Model/flan-t5/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/glm4/README.md | 15 ++------
.../PyTorch-Models/Model/internlm2/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/llama2/README.md | 15 ++------
.../Model/llama3.2-vision/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/llama3/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/llava/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/mamba/README.md | 15 ++------
.../PyTorch-Models/Model/minicpm/README.md | 15 ++------
.../PyTorch-Models/Model/mistral/README.md | 15 ++------
.../PyTorch-Models/Model/mixtral/README.md | 15 ++------
.../Model/openai-whisper/README.md | 15 ++------
.../PyTorch-Models/Model/phi-1_5/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/phi-2/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/phi-3/README.md | 15 ++------
.../PyTorch-Models/Model/phixtral/README.md | 15 ++------
.../PyTorch-Models/Model/qwen-vl/README.md | 15 ++------
.../PyTorch-Models/Model/qwen1.5/README.md | 15 ++------
.../PyTorch-Models/Model/qwen2-vl/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/qwen2/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/replit/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/solar/README.md | 15 ++------
.../PyTorch-Models/Model/speech-t5/README.md | 15 ++------
.../PyTorch-Models/Model/stablelm/README.md | 15 ++------
.../PyTorch-Models/Model/starcoder/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/yi/README.md | 15 ++------
.../GPU/PyTorch-Models/Model/yuan2/README.md | 15 ++------
122 files changed, 232 insertions(+), 1556 deletions(-)
diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index 07ba6ac4..7c88e36e 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -805,7 +805,6 @@ jobs:
call conda activate igpu-perf
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -841,7 +840,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -875,7 +873,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -910,7 +907,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -947,7 +943,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1003,7 +998,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1039,7 +1033,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1073,7 +1066,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1108,7 +1100,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1145,7 +1136,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1200,7 +1190,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1236,7 +1225,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1270,7 +1258,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1305,7 +1292,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1342,7 +1328,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1397,7 +1382,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1435,7 +1419,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1474,7 +1457,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1522,7 +1504,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1572,7 +1553,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1642,7 +1622,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1680,7 +1659,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1717,7 +1695,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1756,7 +1733,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1814,7 +1790,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1852,7 +1827,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1888,7 +1862,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1925,7 +1898,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1964,7 +1936,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2021,7 +1992,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2059,7 +2029,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2095,7 +2064,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2132,7 +2100,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2171,7 +2138,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
diff --git a/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md b/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
index ec73bb84..7d36f35a 100644
--- a/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
+++ b/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
@@ -97,10 +97,6 @@ root@arda-arc12:/# sycl-ls
> # Reduce memory accesses by fusing SDP ops.
> # Recommended for use on Intel Data Center GPU Max Series.
> export ENABLE_SDP_FUSION=1
->
-> # Disable XMX computation.
-> # Recommended for use on integrated GPUs.
-> export BIGDL_LLM_XMX_DISABLED=1
> ```
diff --git a/docs/mddocs/Overview/install_gpu.md b/docs/mddocs/Overview/install_gpu.md
index 2de066e4..38a5e16c 100644
--- a/docs/mddocs/Overview/install_gpu.md
+++ b/docs/mddocs/Overview/install_gpu.md
@@ -150,13 +150,7 @@ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
Please also set the following environment variable if you would like to run LLMs on: -->
-- For **Intel iGPU**:
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
- ```
-
-- For **Intel Arc™ A-Series Graphics**:
+- For **Intel iGPU** and **Intel Arc™ A-Series Graphics**:
```cmd
set SYCL_CACHE_PERSISTENT=1
```
@@ -596,7 +590,6 @@ To use GPU acceleration on Linux, several environment variables are required or
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/benchmark_quickstart.md b/docs/mddocs/Quickstart/benchmark_quickstart.md
index fc5ce949..63cf1203 100644
--- a/docs/mddocs/Quickstart/benchmark_quickstart.md
+++ b/docs/mddocs/Quickstart/benchmark_quickstart.md
@@ -80,34 +80,19 @@ Some parameters in the yaml file that you can configure:
Please refer to [here](../Overview/install_gpu.md#runtime-configuration) to configure oneAPI environment variables. Choose corresponding commands base on your device.
-- For **Intel iGPU**:
+- For **Intel iGPU** and **Intel Arc™ A-Series Graphics**:
```bash
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
python run.py
```
-- For **Intel Arc™ A300-Series or Pro A60**:
-
- ```bash
- set SYCL_CACHE_PERSISTENT=1
- python run.py
- ```
-
-- For **Other Intel dGPU Series**:
-
- ```bash
- # e.g. Arc™ A770
- python run.py
- ```
-
## Run on Linux
Please choose corresponding commands base on your device.
-- For **Intel Arc™ A-Series and Intel Data Center GPU Flex**:
+- For **Intel Arc™ A-Series** and **Intel Data Center GPU Flex**:
For Intel Arc™ A-Series Graphics and Intel Data Center GPU Flex Series, we recommend:
diff --git a/docs/mddocs/Quickstart/install_linux_gpu.md b/docs/mddocs/Quickstart/install_linux_gpu.md
index b32dd346..e3aea7f4 100644
--- a/docs/mddocs/Quickstart/install_linux_gpu.md
+++ b/docs/mddocs/Quickstart/install_linux_gpu.md
@@ -422,7 +422,6 @@ To use GPU acceleration on Linux, several environment variables are required or
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md b/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
index 302f3cb4..15cc9577 100644
--- a/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
+++ b/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
@@ -390,7 +390,6 @@ conda activate llm
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/install_windows_gpu.md b/docs/mddocs/Quickstart/install_windows_gpu.md
index 99895dc1..32a13d0b 100644
--- a/docs/mddocs/Quickstart/install_windows_gpu.md
+++ b/docs/mddocs/Quickstart/install_windows_gpu.md
@@ -109,19 +109,12 @@ You can verify if `ipex-llm` is successfully installed following below steps.
- Set the following environment variables according to your device:
- - For **Intel iGPU**:
-
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
- ```
-
- - For **Intel Arc™ A770**:
+ - For **Intel iGPU** and **Intel Arc™ A770**:
```cmd
set SYCL_CACHE_PERSISTENT=1
```
-
+
> [!TIP]
> For other Intel dGPU Series, please refer to [this guide](../Overview/install_gpu.md#runtime-configuration) for more details regarding runtime configuration.
diff --git a/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md b/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
index 5ee45481..81c2b7b7 100644
--- a/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
+++ b/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
@@ -108,14 +108,7 @@ conda activate llm
- 根据你的设备,设置以下环境参数:
- - **Intel iGPU**:
-
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
- ```
-
- - **Intel Arc™ A770**:
+ - **Intel iGPU** and **Intel Arc™ A770**:
```cmd
set SYCL_CACHE_PERSISTENT=1
diff --git a/docs/mddocs/Quickstart/webui_quickstart.md b/docs/mddocs/Quickstart/webui_quickstart.md
index 6600c6c0..7f06fb5b 100644
--- a/docs/mddocs/Quickstart/webui_quickstart.md
+++ b/docs/mddocs/Quickstart/webui_quickstart.md
@@ -70,11 +70,6 @@ Configure oneAPI variables by running the following command in **Miniforge Promp
set SYCL_CACHE_PERSISTENT=1
```
-If you're running on iGPU, set additional environment variables by running the following commands:
-```cmd
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
#### Launch the Server
In **Miniforge Prompt** with the conda environment `llm` activated, navigate to the `text-generation-webui` folder and execute the following commands (You can optionally lanch the server with or without the API service):
diff --git a/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md b/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
index c8f4e701..1a08475a 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md b/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
index 84bd3a20..141109df 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md b/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
index 9d3b4d78..16b4346d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md b/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
index 2cd38f57..09e55443 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md b/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
index 3cf8120d..3c11ab08 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md b/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
index 407534ac..a9fdc69b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
@@ -70,7 +70,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -78,18 +77,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -97,6 +85,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md b/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
index 70b66b02..111c628c 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
@@ -70,7 +70,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -78,18 +77,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -97,6 +85,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
## 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md b/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
index 974bca27..90ebd4f1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md b/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
index 1bcdef0f..7e44b89b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
@@ -84,7 +84,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -92,18 +91,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -111,6 +99,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md b/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
index d0f9f5e0..dd4a78c1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,18 +88,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -108,6 +96,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md b/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
index 40cf921c..1880608b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md b/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
index 728d534b..cc1f768e 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md b/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
index 04ef2859..47843525 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md b/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
index cc45b258..3e07019a 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md b/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
index ace4489b..f0e05b0b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md b/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
index 0749d294..5c2e79ea 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
@@ -101,7 +101,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -109,18 +108,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 4.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -128,6 +116,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 5. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md b/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
index a309c8e5..ef2426c6 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md b/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
index eff2f97a..6a3a19a7 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md b/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
index b3167e8c..e0795930 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,18 +88,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -108,6 +96,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md b/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
index 541ae806..9cf550c2 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
## 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md b/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
index 25b3fa86..a08a3cf1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md b/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
index 72dfccb6..b9781339 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md b/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
index 6d16158f..0745014e 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
index cdd06484..06bae6ac 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
index 1e006c08..e55c4713 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
index 156c6622..cff5fc9d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
index 3280ead6..2e5a47ec 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md b/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
index eeac47da..e77fd4b0 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md b/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
index 0f008dd0..56c58d61 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md b/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
index 63542bcf..ab0006b8 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
@@ -72,7 +72,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -80,18 +79,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -99,6 +87,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md b/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
index 5e944968..1d116a3a 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md b/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
index 192d31b8..2cd964b4 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
index f34889aa..5d50e053 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
index 7d96882d..45f75fa1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
index c034c9fd..d03ac331 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md b/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
index ab690af3..d40d9052 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
index 8311f7f1..a0192e71 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
index 681f035a..d146e69f 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
index 12052b33..ed477855 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
index 8ade27f6..829139d9 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md b/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
index 1376e42d..3e713025 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/replit/README.md b/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
index 644de85a..e43b6a6d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md b/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
index 55d94f6d..15871c89 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md b/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
index 63cff014..18e3c6ce 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/solar/README.md b/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
index b86044bb..e847c67e 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md b/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
index 5b5f18dd..9746b44a 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md b/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
index 2787a13d..f11b0df3 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md b/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
index 7f4b9806..bd847eb3 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/yi/README.md b/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
index 080e2676..79326333 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md b/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
index dd42cec1..72b40fa5 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,18 +85,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -105,6 +93,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
index a11e1061..9d2e43f4 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
index aed936fb..dee4bc78 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
index 7e0ea2ea..0af1ba5c 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
index fdae240d..91d7b0aa 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
index 3d39402d..6c722e91 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
@@ -57,7 +57,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -65,18 +64,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -84,6 +72,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
index 954e7460..89c2035a 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,18 +83,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -103,6 +91,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
index c37a99f8..7464c7e7 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md b/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
index 4183d309..ad5bf922 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
index 543e2f48..b3b56963 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
index 73723266..90a2e3a0 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
index b201467a..243ab046 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
@@ -82,7 +82,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -90,18 +89,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -109,6 +97,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
index 7dea109b..46f7bccd 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
@@ -86,7 +86,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -94,18 +93,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -113,6 +101,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
index ac664fb0..56c7828e 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,18 +85,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -105,6 +93,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/HuggingFace/Save-Load/README.md b/python/llm/example/GPU/HuggingFace/Save-Load/README.md
index 9aae6c8c..acc12370 100644
--- a/python/llm/example/GPU/HuggingFace/Save-Load/README.md
+++ b/python/llm/example/GPU/HuggingFace/Save-Load/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/LangChain/README.md b/python/llm/example/GPU/LangChain/README.md
index 6df73976..5e84fbfb 100644
--- a/python/llm/example/GPU/LangChain/README.md
+++ b/python/llm/example/GPU/LangChain/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Lightweight-Serving/README.md b/python/llm/example/GPU/Lightweight-Serving/README.md
index 3e67b1e5..2db7cd97 100644
--- a/python/llm/example/GPU/Lightweight-Serving/README.md
+++ b/python/llm/example/GPU/Lightweight-Serving/README.md
@@ -97,7 +97,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -105,18 +104,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -124,6 +112,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/LlamaIndex/README.md b/python/llm/example/GPU/LlamaIndex/README.md
index a56ed793..abdc6728 100644
--- a/python/llm/example/GPU/LlamaIndex/README.md
+++ b/python/llm/example/GPU/LlamaIndex/README.md
@@ -130,7 +130,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -138,18 +137,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 4.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -157,6 +145,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
index b030eb9e..7929cbfa 100644
--- a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
index 20529a0d..09a3ed08 100644
--- a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/ModelScope-Models/README.md b/python/llm/example/GPU/ModelScope-Models/README.md
index 2b5ecacb..d8cdf9e6 100644
--- a/python/llm/example/GPU/ModelScope-Models/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/README.md
@@ -74,7 +74,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -82,18 +81,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -101,6 +89,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
index ccd3a115..8a9a3d7a 100644
--- a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
index da481b37..136bef0f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
index d786bc7f..ac4d0b0e 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
index d4f120f5..741e916d 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
index 5455050e..06500cfb 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
index db198645..ddb7b7e0 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
index f3a10f37..78f47d2b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
@@ -69,7 +69,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -77,18 +76,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -96,6 +84,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
index 6c198bfd..5c8ffa9a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
@@ -69,7 +69,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -77,18 +76,7 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -96,6 +84,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
## 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
index 6dd5e079..35d4f316 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
@@ -84,7 +84,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -92,18 +91,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -111,6 +99,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
index 33da3966..25b092bf 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,18 +88,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -108,6 +96,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
index ff68817e..af26fbcf 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
index a9e66f54..a3a8704e 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
index 4c1a9898..8dbe600a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
index 8268a0c0..cce12c3f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,18 +83,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -103,6 +91,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
index c35fbd4f..d70f6b67 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
index 2c88b488..1c0d7ce6 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
index f7a3c9ad..9c530fa6 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md b/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
index 961b71c0..b5ca3c01 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
index f8906fb2..1c499291 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
index bde37043..8f634a2f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
index 74b31534..543fbf9f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
index 545a6288..b633381d 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
index fa75e826..813c8d40 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
index 2efc010d..5a77dd6b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
index e441212e..b2478cf8 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,18 +80,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -100,6 +88,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
index 4f3e58b0..dd8342da 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
@@ -72,7 +72,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -80,18 +79,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -99,6 +87,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
index 3179f431..6adf2f55 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md b/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
index 5fdd8969..b40a9b85 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,18 +83,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -103,6 +91,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
index 3a4c88dc..41fcc597 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
index bbd276b9..3824652b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
index cc4542c9..873ca7f3 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
index 1458f2ab..688d4ef7 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
index c480c545..1cec10e1 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
index 801fdd59..1b6b0388 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
index 67cacad8..c9e78c68 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,18 +86,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -106,6 +94,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
index 9a3e3e03..809d03eb 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
index 3bfbf245..06ad2ac1 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
index 4d157d19..e97c4121 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
index fd487a38..af002746 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
index 3c195295..24befdd5 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,18 +84,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -104,6 +92,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
index e1ffd7d6..09719a8c 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,18 +78,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -98,6 +86,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
index 2b500175..0970a46a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,18 +82,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -102,6 +90,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
index 87c1ccd9..6055fe3a 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,18 +85,7 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
-
-
-
-
-For Intel Arc™ A-Series Graphics
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
@@ -105,6 +93,7 @@ set SYCL_CACHE_PERSISTENT=1
+
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
### 4. Running examples