LLM: support bigdl quantize kv cache env and add warning. (#10623)
* LLM: support bigdl quantize kv cache env and add warnning. * fix style. * fix comments.
This commit is contained in:
parent
20a5e72da0
commit
58b57177e3
1 changed files with 7 additions and 0 deletions
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
import os
|
||||
import torch
|
||||
import warnings
|
||||
from ipex_llm.utils.common import invalidInputError
|
||||
from ipex_llm.ggml.quantize import ggml_tensor_qtype
|
||||
from ipex_llm.transformers.utils import get_ipex_version, get_xpu_device_type
|
||||
|
|
@ -74,6 +75,12 @@ def append_kv_cache(cache_k, cache_v, key_states, value_states):
|
|||
def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor) -> bool:
|
||||
if os.environ.get("IPEX_LLM_LOW_MEM", None) is not None:
|
||||
return os.environ["IPEX_LLM_LOW_MEM"] == "1"
|
||||
elif os.environ.get("BIGDL_QUANTIZE_KV_CACHE", None) is not None:
|
||||
warnings.warn(
|
||||
"`BIGDL_QUANTIZE_KV_CACHE` is deprecated and will be removed in future releases. "
|
||||
"Please use `IPEX_LLM_QUANTIZE_KV_CACHE` instead."
|
||||
)
|
||||
return os.environ["BIGDL_QUANTIZE_KV_CACHE"] == "1"
|
||||
elif os.environ.get("IPEX_LLM_QUANTIZE_KV_CACHE", None) is not None:
|
||||
return os.environ["IPEX_LLM_QUANTIZE_KV_CACHE"] == "1"
|
||||
else:
|
||||
|
|
|
|||
Loading…
Reference in a new issue