feat: add inference examples
This commit is contained in:
parent
f7bd58745e
commit
3b9fe4de06
6 changed files with 86 additions and 13 deletions
16
README.md
16
README.md
|
@ -6,21 +6,13 @@ After installing `ipex-llm` which is required to use Intel GPUs (see documentati
|
|||
|
||||
## Setup
|
||||
|
||||
1. Activate the conda environment
|
||||
1. Run `env.sh` to activate the conda environment and set
|
||||
|
||||
```bash
|
||||
conda activate llm-pt26
|
||||
$ . env.sh
|
||||
```
|
||||
|
||||
2. Set the necessary environmental variables:
|
||||
|
||||
```bash
|
||||
unset OCL_ICD_VENDORS
|
||||
export SYCL_CACHE_PERSISTENT=1
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
```
|
||||
|
||||
3. (Optional) Confirm if XPU is detected
|
||||
2. (Optional) Confirm if XPU is detected
|
||||
|
||||
```bash
|
||||
$ python # go intou the python shell
|
||||
|
@ -31,7 +23,7 @@ $ torch.xpu.get_device_name()
|
|||
|
||||
```
|
||||
|
||||
Links
|
||||
## Links
|
||||
|
||||
- [Install IPEX-LLM on Intel GPU with PyTorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md)
|
||||
- [Get started with PyTorch locally](https://pytorch.org/get-started/locally/)
|
||||
|
|
18
amp-inference.py
Normal file
18
amp-inference.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import torch
|
||||
import torchvision.models as models
|
||||
|
||||
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
|
||||
model.eval()
|
||||
data = torch.rand(1, 3, 224, 224)
|
||||
|
||||
model = model.to("xpu")
|
||||
data = data.to("xpu")
|
||||
|
||||
with torch.no_grad():
|
||||
d = torch.rand(1, 3, 224, 224)
|
||||
d = d.to("xpu")
|
||||
# set dtype=torch.bfloat16 for BF16
|
||||
with torch.autocast(device_type="xpu", dtype=torch.float16, enabled=True):
|
||||
model(data)
|
||||
|
||||
print("Execution finished")
|
5
env.sh
Normal file
5
env.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
conda activate llm-pt26
|
||||
|
||||
unset OCL_ICD_VENDORS
|
||||
export SYCL_CACHE_PERSISTENT=1
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
15
fp32-inference.py
Normal file
15
fp32-inference.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
import torch
|
||||
import torchvision.models as models
|
||||
|
||||
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
|
||||
model.eval()
|
||||
data = torch.rand(1, 3, 224, 224)
|
||||
|
||||
model = model.to("xpu")
|
||||
data = data.to("xpu")
|
||||
|
||||
with torch.no_grad():
|
||||
something = model(data)
|
||||
print(something)
|
||||
|
||||
print("Execution finished")
|
15
main.py
15
main.py
|
@ -1,3 +1,16 @@
|
|||
import torch
|
||||
x = torch.rand(5, 3)
|
||||
|
||||
# tensor_1 = torch.randn(1, 1, 40, 128).to('xpu')
|
||||
# tensor_2 = torch.randn(1, 1, 128, 40).to('xpu')
|
||||
# print(tensor_1)
|
||||
# print(tensor_2)
|
||||
# print(torch.matmul(tensor_1, tensor_2).size())
|
||||
|
||||
print(torch.xpu.is_available())
|
||||
print(torch.xpu.get_device_name(0))
|
||||
|
||||
x = torch.rand(5, 99999).to('xpu')
|
||||
print(x)
|
||||
|
||||
print(torch.xpu.memory_allocated())
|
||||
print(torch.xpu.memory_reserved())
|
||||
|
|
30
torch-compile-inference.py
Normal file
30
torch-compile-inference.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import torch
|
||||
import torchvision.models as models
|
||||
import time
|
||||
|
||||
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
|
||||
model.eval()
|
||||
data = torch.rand(1, 3, 224, 224)
|
||||
ITERS = 10
|
||||
|
||||
model = model.to("xpu")
|
||||
data = data.to("xpu")
|
||||
|
||||
for i in range(ITERS):
|
||||
start = time.time()
|
||||
with torch.no_grad():
|
||||
model(data)
|
||||
torch.xpu.synchronize()
|
||||
end = time.time()
|
||||
print(f"Inference time before torch.compile for iteration {i}: {(end-start)*1000} ms")
|
||||
|
||||
model = torch.compile(model)
|
||||
for i in range(ITERS):
|
||||
start = time.time()
|
||||
with torch.no_grad():
|
||||
model(data)
|
||||
torch.xpu.synchronize()
|
||||
end = time.time()
|
||||
print(f"Inference time after torch.compile for iteration {i}: {(end-start)*1000} ms")
|
||||
|
||||
print("Execution finished")
|
Loading…
Reference in a new issue