Fix llava example to support transformerds 4.36 (#10614)

* fix llava example

* update
This commit is contained in:
Jiao Wang 2024-04-09 13:47:07 -07:00 committed by GitHub
parent 1e817926ba
commit 878a97077b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 27 additions and 15 deletions

View file

@ -16,11 +16,13 @@ conda create -n llm python=3.11 # recommend to use Python 3.11
conda activate llm
pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
pip install einops # install dependencies required by llava
pip install transformers==4.36.2
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
cd LLaVA # change the working directory to the LLaVA folder
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
```
### 2. Run

View file

@ -39,6 +39,7 @@ import time
from transformers import AutoModelForCausalLM
from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM
from transformers import AutoTokenizer
from transformers import TextStreamer
from llava.constants import (
DEFAULT_IMAGE_PATCH_TOKEN,
@ -311,11 +312,14 @@ if __name__ == '__main__':
print("exit...")
break
print(f"{roles[1]}: ", end="")
prompt = get_prompt(model.config.mm_use_im_start_end, first_round, conv, user_input)
first_round = False
input_ids = tokenizer_image_token(
prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0)
stopping_criteria = get_stopping_criteria(conv, tokenizer, input_ids)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Generate predicted tokens
with torch.inference_mode():
@ -325,13 +329,11 @@ if __name__ == '__main__':
images=image_tensor,
do_sample=True,
max_new_tokens=args.n_predict,
streamer=streamer,
use_cache=True,
stopping_criteria=[stopping_criteria])
end = time.time()
#print(f'Inference time: {end-st} s')
outputs = tokenizer.decode(
output_ids[0, input_ids.shape[1]:], skip_special_tokens=True).strip()
outputs = tokenizer.decode(output_ids[0, :], skip_special_tokens=True).strip()
conv.messages[-1][-1] = outputs
print(f"{roles[1]}: ", end="")
print(outputs)

View file

@ -17,11 +17,13 @@ conda activate llm
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
pip install einops # install dependencies required by llava
pip install transformers==4.36.2
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
cd LLaVA # change the working directory to the LLaVA folder
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
```
#### 1.2 Installation on Windows
@ -31,11 +33,14 @@ conda create -n llm python=3.11 libuv
conda activate llm
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
pip install einops # install dependencies required by llava
pip install transformers==4.36.2
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
cd LLaVA # change the working directory to the LLaVA folder
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
```
### 2. Configures OneAPI environment variables

View file

@ -39,6 +39,7 @@ import time
from transformers import AutoModelForCausalLM
from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM
from transformers import AutoTokenizer
from transformers import TextStreamer
from llava.constants import (
DEFAULT_IMAGE_PATCH_TOKEN,
@ -312,11 +313,14 @@ if __name__ == '__main__':
print("exit...")
break
print(f"{roles[1]}: ", end="")
prompt = get_prompt(model.config.mm_use_im_start_end, first_round, conv, user_input)
first_round = False
input_ids = tokenizer_image_token(
prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to('xpu')
stopping_criteria = get_stopping_criteria(conv, tokenizer, input_ids)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Generate predicted tokens
with torch.inference_mode():
@ -326,13 +330,11 @@ if __name__ == '__main__':
images=image_tensor,
do_sample=True,
max_new_tokens=args.n_predict,
streamer=streamer,
use_cache=True,
stopping_criteria=[stopping_criteria])
end = time.time()
#print(f'Inference time: {end-st} s')
outputs = tokenizer.decode(
output_ids[0, input_ids.shape[1]:].cpu(), skip_special_tokens=True).strip()
outputs = tokenizer.decode(output_ids[0, :].cpu(), skip_special_tokens=True).strip()
conv.messages[-1][-1] = outputs
print(f"{roles[1]}: ", end="")
print(outputs)

View file

@ -114,7 +114,8 @@ def llama_model_forward_4_36(
) -> Union[Tuple, BaseModelOutputWithPast]:
from ipex_llm.transformers.kv import DynamicFp8Cache
use_cache = use_cache if use_cache is not None else self.config.use_cache
if use_cache and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input_ids):
input = input_ids if input_ids is not None else inputs_embeds
if use_cache and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input):
if not isinstance(past_key_values, DynamicFp8Cache):
past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
return llama_model_forward_4_36_internal(