fix npu llama2 (#11471)
This commit is contained in:
parent
07362ffffc
commit
319a3b36b2
2 changed files with 3 additions and 1 deletions
|
|
@ -21,6 +21,8 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
|
||||||
|
|
||||||
# below command will install intel_npu_acceleration_library
|
# below command will install intel_npu_acceleration_library
|
||||||
pip install intel-npu-acceleration-library==1.3
|
pip install intel-npu-acceleration-library==1.3
|
||||||
|
|
||||||
|
pip install transformers==4.40
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Runtime Configurations
|
### 2. Runtime Configurations
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ def llama_model_forward(
|
||||||
from ipex_llm.transformers.kv import DynamicNormalCache
|
from ipex_llm.transformers.kv import DynamicNormalCache
|
||||||
if use_cache and not isinstance(past_key_values, DynamicNormalCache):
|
if use_cache and not isinstance(past_key_values, DynamicNormalCache):
|
||||||
past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
|
past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
|
||||||
past_seen_tokens = past_key_values.set_seq_length()
|
past_seen_tokens = past_key_values.get_seq_length()
|
||||||
|
|
||||||
if cache_position is None:
|
if cache_position is None:
|
||||||
cache_position = torch.arange(past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1],
|
cache_position = torch.arange(past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1],
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue