feat: add inference examples

This commit is contained in:
Ayo Ayco 2025-09-02 21:21:02 +02:00
parent f7bd58745e
commit 3b9fe4de06
6 changed files with 86 additions and 13 deletions

View file

@ -6,21 +6,13 @@ After installing `ipex-llm` which is required to use Intel GPUs (see documentati
## Setup ## Setup
1. Activate the conda environment 1. Run `env.sh` to activate the conda environment and set
```bash ```bash
conda activate llm-pt26 $ . env.sh
``` ```
2. Set the necessary environmental variables: 2. (Optional) Confirm if XPU is detected
```bash
unset OCL_ICD_VENDORS
export SYCL_CACHE_PERSISTENT=1
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
```
3. (Optional) Confirm if XPU is detected
```bash ```bash
$ python # go intou the python shell $ python # go intou the python shell
@ -31,7 +23,7 @@ $ torch.xpu.get_device_name()
``` ```
Links ## Links
- [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md) - [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md)
- [Get started with PyTorch locally](https://pytorch.org/get-started/locally/) - [Get started with PyTorch locally](https://pytorch.org/get-started/locally/)

18
amp-inference.py Normal file
View file

@ -0,0 +1,18 @@
import torch
import torchvision.models as models
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
model.eval()
data = torch.rand(1, 3, 224, 224)
model = model.to("xpu")
data = data.to("xpu")
with torch.no_grad():
d = torch.rand(1, 3, 224, 224)
d = d.to("xpu")
# set dtype=torch.bfloat16 for BF16
with torch.autocast(device_type="xpu", dtype=torch.float16, enabled=True):
model(data)
print("Execution finished")

5
env.sh Normal file
View file

@ -0,0 +1,5 @@
conda activate llm-pt26
unset OCL_ICD_VENDORS
export SYCL_CACHE_PERSISTENT=1
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1

15
fp32-inference.py Normal file
View file

@ -0,0 +1,15 @@
import torch
import torchvision.models as models
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
model.eval()
data = torch.rand(1, 3, 224, 224)
model = model.to("xpu")
data = data.to("xpu")
with torch.no_grad():
something = model(data)
print(something)
print("Execution finished")

15
main.py
View file

@ -1,3 +1,16 @@
import torch import torch
x = torch.rand(5, 3)
# tensor_1 = torch.randn(1, 1, 40, 128).to('xpu')
# tensor_2 = torch.randn(1, 1, 128, 40).to('xpu')
# print(tensor_1)
# print(tensor_2)
# print(torch.matmul(tensor_1, tensor_2).size())
print(torch.xpu.is_available())
print(torch.xpu.get_device_name(0))
x = torch.rand(5, 99999).to('xpu')
print(x) print(x)
print(torch.xpu.memory_allocated())
print(torch.xpu.memory_reserved())

View file

@ -0,0 +1,30 @@
import torch
import torchvision.models as models
import time
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
model.eval()
data = torch.rand(1, 3, 224, 224)
ITERS = 10
model = model.to("xpu")
data = data.to("xpu")
for i in range(ITERS):
start = time.time()
with torch.no_grad():
model(data)
torch.xpu.synchronize()
end = time.time()
print(f"Inference time before torch.compile for iteration {i}: {(end-start)*1000} ms")
model = torch.compile(model)
for i in range(ITERS):
start = time.time()
with torch.no_grad():
model(data)
torch.xpu.synchronize()
end = time.time()
print(f"Inference time after torch.compile for iteration {i}: {(end-start)*1000} ms")
print("Execution finished")