From 58b57177e32a8c64728303ecffb815210bb15edc Mon Sep 17 00:00:00 2001 From: Cengguang Zhang Date: Tue, 2 Apr 2024 15:41:08 +0800 Subject: [PATCH] LLM: support bigdl quantize kv cache env and add warning. (#10623) * LLM: support bigdl quantize kv cache env and add warnning. * fix style. * fix comments. --- python/llm/src/ipex_llm/transformers/models/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/models/utils.py b/python/llm/src/ipex_llm/transformers/models/utils.py index da4081ec..a3c66a0d 100644 --- a/python/llm/src/ipex_llm/transformers/models/utils.py +++ b/python/llm/src/ipex_llm/transformers/models/utils.py @@ -16,6 +16,7 @@ import os import torch +import warnings from ipex_llm.utils.common import invalidInputError from ipex_llm.ggml.quantize import ggml_tensor_qtype from ipex_llm.transformers.utils import get_ipex_version, get_xpu_device_type @@ -74,6 +75,12 @@ def append_kv_cache(cache_k, cache_v, key_states, value_states): def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor) -> bool: if os.environ.get("IPEX_LLM_LOW_MEM", None) is not None: return os.environ["IPEX_LLM_LOW_MEM"] == "1" + elif os.environ.get("BIGDL_QUANTIZE_KV_CACHE", None) is not None: + warnings.warn( + "`BIGDL_QUANTIZE_KV_CACHE` is deprecated and will be removed in future releases. " + "Please use `IPEX_LLM_QUANTIZE_KV_CACHE` instead." + ) + return os.environ["BIGDL_QUANTIZE_KV_CACHE"] == "1" elif os.environ.get("IPEX_LLM_QUANTIZE_KV_CACHE", None) is not None: return os.environ["IPEX_LLM_QUANTIZE_KV_CACHE"] == "1" else: