From 66bd7abae4df354512c71198ba32a267e15bc1dd Mon Sep 17 00:00:00 2001 From: Jinhe Date: Tue, 26 Nov 2024 11:38:09 +0800 Subject: [PATCH] add sdxl and lora-lcm optimization (#12444) * add sdxl and lora-lcm optimization * fix openjourney speed drop --- .../HuggingFace/Multimodal/StableDiffusion/lora-lcm.py | 8 +++++--- .../GPU/HuggingFace/Multimodal/StableDiffusion/sdxl.py | 8 +++++--- python/llm/src/ipex_llm/transformers/models/sd.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/lora-lcm.py b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/lora-lcm.py index 587eb17a..5629ef67 100644 --- a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/lora-lcm.py +++ b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/lora-lcm.py @@ -17,7 +17,7 @@ import torch from diffusers import DiffusionPipeline, LCMScheduler -import ipex_llm +from ipex_llm import optimize_model import argparse import time @@ -25,8 +25,10 @@ import time def main(args): pipe = DiffusionPipeline.from_pretrained( args.repo_id_or_model_path, - torch_dtype=torch.bfloat16, - ).to("xpu") + torch_dtype=torch.float16, + ) + pipe = optimize_model(pipe, low_bit=None) + pipe.to("xpu") # set scheduler pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/sdxl.py b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/sdxl.py index 5e7b20fb..e15224ad 100644 --- a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/sdxl.py +++ b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/sdxl.py @@ -17,7 +17,7 @@ from diffusers import AutoPipelineForText2Image import torch -import ipex_llm +from ipex_llm import optimize_model import numpy as np from PIL import Image import argparse @@ -27,9 +27,11 @@ import time def main(args): pipeline_text2image = AutoPipelineForText2Image.from_pretrained( args.repo_id_or_model_path, - torch_dtype=torch.bfloat16, + torch_dtype=torch.float16, use_safetensors=True - ).to("xpu") + ) + pipeline_text2image = optimize_model(pipeline_text2image, low_bit=None) + pipeline_text2image.to("xpu") with torch.inference_mode(): # warmup diff --git a/python/llm/src/ipex_llm/transformers/models/sd.py b/python/llm/src/ipex_llm/transformers/models/sd.py index 50003903..4ba360b1 100644 --- a/python/llm/src/ipex_llm/transformers/models/sd.py +++ b/python/llm/src/ipex_llm/transformers/models/sd.py @@ -111,7 +111,7 @@ class AttnProcessor2_0: # padding head_dim 40 to 64 query, key, value = padding_qkv_hd(query, key, value, 40, 64) - if use_sdp_non_causal(head_dim, query.device, query.dtype): + if use_sdp_non_causal(query.size(-1), query.device, query.dtype): import xe_addons hidden_states = xe_addons.sdp_non_causal(query, key.contiguous(), value.contiguous(), attention_mask)