feat: add inference examples

2025-09-02 21:21:02 +02:00 · 2025-09-02 21:21:02 +02:00 · 3b9fe4de06
commit 3b9fe4de06
parent f7bd58745e
6 changed files with 86 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -6,21 +6,13 @@ After installing `ipex-llm` which is required to use Intel GPUs (see documentati
 ## Setup
-1. Activate the conda environment
+1. Run `env.sh` to activate the conda environment and set
 ```bash
-conda activate llm-pt26
+$ . env.sh
 ```
-2. Set the necessary environmental variables:
+2. (Optional) Confirm if XPU is detected
 ```bash
 unset OCL_ICD_VENDORS
 export SYCL_CACHE_PERSISTENT=1
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ```
 3. (Optional) Confirm if XPU is detected
 ```bash
 $ python # go intou the python shell
@ -31,7 +23,7 @@ $ torch.xpu.get_device_name()
 ```
-Links
+## Links
 - [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md)
 - [Get started with PyTorch locally](https://pytorch.org/get-started/locally/)
--- a/amp-inference.py
+++ b/amp-inference.py
@ -0,0 +1,18 @@
 import torch
 import torchvision.models as models
 model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
 model.eval()
 data = torch.rand(1, 3, 224, 224)
 model = model.to("xpu")
 data = data.to("xpu")
 with torch.no_grad():
    d = torch.rand(1, 3, 224, 224)
    d = d.to("xpu")
    # set dtype=torch.bfloat16 for BF16
    with torch.autocast(device_type="xpu", dtype=torch.float16, enabled=True):
        model(data)
 print("Execution finished")
--- a/env.sh
+++ b/env.sh
@ -0,0 +1,5 @@
 conda activate llm-pt26
 unset OCL_ICD_VENDORS
 export SYCL_CACHE_PERSISTENT=1
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
--- a/fp32-inference.py
+++ b/fp32-inference.py
@ -0,0 +1,15 @@
 import torch
 import torchvision.models as models
 model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
 model.eval()
 data = torch.rand(1, 3, 224, 224)
 model = model.to("xpu")
 data = data.to("xpu")
 with torch.no_grad():
    something = model(data)
    print(something)
 print("Execution finished")
--- a/main.py
+++ b/main.py
@ -1,3 +1,16 @@
 import torch
-x = torch.rand(5, 3)
+
 # tensor_1 = torch.randn(1, 1, 40, 128).to('xpu')
 # tensor_2 = torch.randn(1, 1, 128, 40).to('xpu')
 # print(tensor_1)
 # print(tensor_2)
 # print(torch.matmul(tensor_1, tensor_2).size())
 print(torch.xpu.is_available())
 print(torch.xpu.get_device_name(0))
 x = torch.rand(5, 99999).to('xpu')
 print(x)
 print(torch.xpu.memory_allocated())
 print(torch.xpu.memory_reserved())
--- a/torch-compile-inference.py
+++ b/torch-compile-inference.py
@ -0,0 +1,30 @@
 import torch
 import torchvision.models as models
 import time
 model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
 model.eval()
 data = torch.rand(1, 3, 224, 224)
 ITERS = 10
 model = model.to("xpu")
 data = data.to("xpu")
 for i in range(ITERS):
    start = time.time()
    with torch.no_grad():
        model(data)
        torch.xpu.synchronize()
    end = time.time()
    print(f"Inference time before torch.compile for iteration {i}: {(end-start)*1000} ms")
 model = torch.compile(model)
 for i in range(ITERS):
    start = time.time()
    with torch.no_grad():
        model(data)
        torch.xpu.synchronize()
    end = time.time()
    print(f"Inference time after torch.compile for iteration {i}: {(end-start)*1000} ms")
 print("Execution finished")