From 3b9fe4de06f9e73b1157b040a52a628937dd7ca3 Mon Sep 17 00:00:00 2001 From: Ayo Date: Tue, 2 Sep 2025 21:21:02 +0200 Subject: [PATCH] feat: add inference examples --- README.md | 16 ++++------------ amp-inference.py | 18 ++++++++++++++++++ env.sh | 5 +++++ fp32-inference.py | 15 +++++++++++++++ main.py | 15 ++++++++++++++- torch-compile-inference.py | 30 ++++++++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 13 deletions(-) create mode 100644 amp-inference.py create mode 100644 env.sh create mode 100644 fp32-inference.py create mode 100644 torch-compile-inference.py diff --git a/README.md b/README.md index dfd2adc..c66dbfc 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,13 @@ After installing `ipex-llm` which is required to use Intel GPUs (see documentati ## Setup -1. Activate the conda environment +1. Run `env.sh` to activate the conda environment and set ```bash -conda activate llm-pt26 +$ . env.sh ``` -2. Set the necessary environmental variables: - -```bash -unset OCL_ICD_VENDORS -export SYCL_CACHE_PERSISTENT=1 -export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 -``` - -3. (Optional) Confirm if XPU is detected +2. (Optional) Confirm if XPU is detected ```bash $ python # go intou the python shell @@ -31,7 +23,7 @@ $ torch.xpu.get_device_name() ``` -Links +## Links - [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md) - [Get started with PyTorch locally](https://pytorch.org/get-started/locally/) diff --git a/amp-inference.py b/amp-inference.py new file mode 100644 index 0000000..b3bb2a6 --- /dev/null +++ b/amp-inference.py @@ -0,0 +1,18 @@ +import torch +import torchvision.models as models + +model = models.resnet50(weights="ResNet50_Weights.DEFAULT") +model.eval() +data = torch.rand(1, 3, 224, 224) + +model = model.to("xpu") +data = data.to("xpu") + +with torch.no_grad(): + d = torch.rand(1, 3, 224, 224) + d = d.to("xpu") + # set dtype=torch.bfloat16 for BF16 + with torch.autocast(device_type="xpu", dtype=torch.float16, enabled=True): + model(data) + +print("Execution finished") \ No newline at end of file diff --git a/env.sh b/env.sh new file mode 100644 index 0000000..67d5e9c --- /dev/null +++ b/env.sh @@ -0,0 +1,5 @@ +conda activate llm-pt26 + +unset OCL_ICD_VENDORS +export SYCL_CACHE_PERSISTENT=1 +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 diff --git a/fp32-inference.py b/fp32-inference.py new file mode 100644 index 0000000..df87ad2 --- /dev/null +++ b/fp32-inference.py @@ -0,0 +1,15 @@ +import torch +import torchvision.models as models + +model = models.resnet50(weights="ResNet50_Weights.DEFAULT") +model.eval() +data = torch.rand(1, 3, 224, 224) + +model = model.to("xpu") +data = data.to("xpu") + +with torch.no_grad(): + something = model(data) + print(something) + +print("Execution finished") diff --git a/main.py b/main.py index 9c6de66..7d7c372 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,16 @@ import torch -x = torch.rand(5, 3) + +# tensor_1 = torch.randn(1, 1, 40, 128).to('xpu') +# tensor_2 = torch.randn(1, 1, 128, 40).to('xpu') +# print(tensor_1) +# print(tensor_2) +# print(torch.matmul(tensor_1, tensor_2).size()) + +print(torch.xpu.is_available()) +print(torch.xpu.get_device_name(0)) + +x = torch.rand(5, 99999).to('xpu') print(x) + +print(torch.xpu.memory_allocated()) +print(torch.xpu.memory_reserved()) diff --git a/torch-compile-inference.py b/torch-compile-inference.py new file mode 100644 index 0000000..d9508c7 --- /dev/null +++ b/torch-compile-inference.py @@ -0,0 +1,30 @@ +import torch +import torchvision.models as models +import time + +model = models.resnet50(weights="ResNet50_Weights.DEFAULT") +model.eval() +data = torch.rand(1, 3, 224, 224) +ITERS = 10 + +model = model.to("xpu") +data = data.to("xpu") + +for i in range(ITERS): + start = time.time() + with torch.no_grad(): + model(data) + torch.xpu.synchronize() + end = time.time() + print(f"Inference time before torch.compile for iteration {i}: {(end-start)*1000} ms") + +model = torch.compile(model) +for i in range(ITERS): + start = time.time() + with torch.no_grad(): + model(data) + torch.xpu.synchronize() + end = time.time() + print(f"Inference time after torch.compile for iteration {i}: {(end-start)*1000} ms") + +print("Execution finished") \ No newline at end of file