feat: add inference examples

2025-09-02 21:21:02 +02:00 · 2025-09-02 21:21:02 +02:00 · 3b9fe4de06
commit 3b9fe4de06
parent f7bd58745e
6 changed files with 86 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -6,21 +6,13 @@ After installing `ipex-llm` which is required to use Intel GPUs (see documentati

 ## Setup

-1. Activate the conda environment
+1. Run `env.sh` to activate the conda environment and set

 ```bash
-conda activate llm-pt26
+$ . env.sh
 ```

-2. Set the necessary environmental variables:
-
-```bash
-unset OCL_ICD_VENDORS
-export SYCL_CACHE_PERSISTENT=1
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-```
-
-3. (Optional) Confirm if XPU is detected
+2. (Optional) Confirm if XPU is detected

 ```bash
 $ python # go intou the python shell
@ -31,7 +23,7 @@ $ torch.xpu.get_device_name()

 ```

-Links
+## Links

 - [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md)
 - [Get started with PyTorch locally](https://pytorch.org/get-started/locally/)
--- a/amp-inference.py
+++ b/amp-inference.py
@ -0,0 +1,18 @@
+import torch
+import torchvision.models as models
+
+model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
+model.eval()
+data = torch.rand(1, 3, 224, 224)
+
+model = model.to("xpu")
+data = data.to("xpu")
+
+with torch.no_grad():
+    d = torch.rand(1, 3, 224, 224)
+    d = d.to("xpu")
+    # set dtype=torch.bfloat16 for BF16
+    with torch.autocast(device_type="xpu", dtype=torch.float16, enabled=True):
+        model(data)
+
+print("Execution finished")
--- a/env.sh
+++ b/env.sh
@ -0,0 +1,5 @@
+conda activate llm-pt26
+
+unset OCL_ICD_VENDORS
+export SYCL_CACHE_PERSISTENT=1
+export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
--- a/fp32-inference.py
+++ b/fp32-inference.py
@ -0,0 +1,15 @@
+import torch
+import torchvision.models as models
+
+model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
+model.eval()
+data = torch.rand(1, 3, 224, 224)
+
+model = model.to("xpu")
+data = data.to("xpu")
+
+with torch.no_grad():
+    something = model(data)
+    print(something)
+
+print("Execution finished")
--- a/main.py
+++ b/main.py
@ -1,3 +1,16 @@
 import torch
-x = torch.rand(5, 3)
+
+# tensor_1 = torch.randn(1, 1, 40, 128).to('xpu')
+# tensor_2 = torch.randn(1, 1, 128, 40).to('xpu')
+# print(tensor_1)
+# print(tensor_2)
+# print(torch.matmul(tensor_1, tensor_2).size())
+
+print(torch.xpu.is_available())
+print(torch.xpu.get_device_name(0))
+
+x = torch.rand(5, 99999).to('xpu')
 print(x)
+
+print(torch.xpu.memory_allocated())
+print(torch.xpu.memory_reserved())
--- a/torch-compile-inference.py
+++ b/torch-compile-inference.py
@ -0,0 +1,30 @@
+import torch
+import torchvision.models as models
+import time
+
+model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
+model.eval()
+data = torch.rand(1, 3, 224, 224)
+ITERS = 10
+
+model = model.to("xpu")
+data = data.to("xpu")
+
+for i in range(ITERS):
+    start = time.time()
+    with torch.no_grad():
+        model(data)
+        torch.xpu.synchronize()
+    end = time.time()
+    print(f"Inference time before torch.compile for iteration {i}: {(end-start)*1000} ms")
+
+model = torch.compile(model)
+for i in range(ITERS):
+    start = time.time()
+    with torch.no_grad():
+        model(data)
+        torch.xpu.synchronize()
+    end = time.time()
+    print(f"Inference time after torch.compile for iteration {i}: {(end-start)*1000} ms")
+
+print("Execution finished")