Update IPEX_LLM_PERFORMANCE_MODE (#11823)

This commit is contained in:
Yuwen Hu 2024-08-16 09:48:36 +08:00 committed by GitHub
parent 5a80fd2633
commit 9e9086cc2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -487,14 +487,18 @@ def update_past_key_value(past_key_value, key_states, value_states,
def should_use_compresskv(x: torch.Tensor, prompt_len: int):
use_compress_kv = os.environ.get("IPEX_LLM_COMPRESS_KV_CACHE", None)
if use_compress_kv is None:
return (
get_xpu_device_type(x) == "mtl"
and prompt_len >= 1800
and prompt_len <= 4500
)
perf_mode = os.environ.get("IPEX_LLM_PERFORMANCE_MODE", None)
if perf_mode == "1":
return False
else:
return x.device.type == 'xpu' and use_compress_kv == "1"
if use_compress_kv is None:
return (
get_xpu_device_type(x) == "mtl"
and prompt_len >= 1800
and prompt_len <= 4500
)
else:
return x.device.type == 'xpu' and use_compress_kv == "1"
def get_compresskv_attn_mask(key_states: torch.Tensor,