Fix llava example to support transformerds 4.36 (#10614)
* fix llava example * update
This commit is contained in:
parent
1e817926ba
commit
878a97077b
5 changed files with 27 additions and 15 deletions
|
|
@ -16,11 +16,13 @@ conda create -n llm python=3.11 # recommend to use Python 3.11
|
|||
conda activate llm
|
||||
|
||||
pip install --pre --upgrade ipex-llm[all] # install the latest ipex-llm nightly build with 'all' option
|
||||
|
||||
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
pip install einops # install dependencies required by llava
|
||||
pip install transformers==4.36.2
|
||||
|
||||
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
|
||||
cd LLaVA # change the working directory to the LLaVA folder
|
||||
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
|
||||
```
|
||||
|
||||
### 2. Run
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import time
|
|||
from transformers import AutoModelForCausalLM
|
||||
from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import TextStreamer
|
||||
|
||||
from llava.constants import (
|
||||
DEFAULT_IMAGE_PATCH_TOKEN,
|
||||
|
|
@ -311,11 +312,14 @@ if __name__ == '__main__':
|
|||
print("exit...")
|
||||
break
|
||||
|
||||
print(f"{roles[1]}: ", end="")
|
||||
|
||||
prompt = get_prompt(model.config.mm_use_im_start_end, first_round, conv, user_input)
|
||||
first_round = False
|
||||
input_ids = tokenizer_image_token(
|
||||
prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0)
|
||||
stopping_criteria = get_stopping_criteria(conv, tokenizer, input_ids)
|
||||
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
||||
|
||||
# Generate predicted tokens
|
||||
with torch.inference_mode():
|
||||
|
|
@ -325,13 +329,11 @@ if __name__ == '__main__':
|
|||
images=image_tensor,
|
||||
do_sample=True,
|
||||
max_new_tokens=args.n_predict,
|
||||
streamer=streamer,
|
||||
use_cache=True,
|
||||
stopping_criteria=[stopping_criteria])
|
||||
end = time.time()
|
||||
#print(f'Inference time: {end-st} s')
|
||||
|
||||
outputs = tokenizer.decode(
|
||||
output_ids[0, input_ids.shape[1]:], skip_special_tokens=True).strip()
|
||||
outputs = tokenizer.decode(output_ids[0, :], skip_special_tokens=True).strip()
|
||||
conv.messages[-1][-1] = outputs
|
||||
print(f"{roles[1]}: ", end="")
|
||||
print(outputs)
|
||||
|
|
|
|||
|
|
@ -17,11 +17,13 @@ conda activate llm
|
|||
|
||||
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
|
||||
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
pip install einops # install dependencies required by llava
|
||||
pip install transformers==4.36.2
|
||||
|
||||
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
|
||||
cd LLaVA # change the working directory to the LLaVA folder
|
||||
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
|
||||
```
|
||||
|
||||
#### 1.2 Installation on Windows
|
||||
|
|
@ -31,11 +33,14 @@ conda create -n llm python=3.11 libuv
|
|||
conda activate llm
|
||||
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
|
||||
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
git clone -b v1.1.1 --depth=1 https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
pip install einops # install dependencies required by llava
|
||||
pip install transformers==4.36.2
|
||||
|
||||
git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
|
||||
cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
|
||||
cd LLaVA # change the working directory to the LLaVA folder
|
||||
git checkout tags/v1.2.0 -b 1.2.0 # Get the branch which is compatible with transformers 4.36
|
||||
|
||||
```
|
||||
|
||||
### 2. Configures OneAPI environment variables
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import time
|
|||
from transformers import AutoModelForCausalLM
|
||||
from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import TextStreamer
|
||||
|
||||
from llava.constants import (
|
||||
DEFAULT_IMAGE_PATCH_TOKEN,
|
||||
|
|
@ -312,11 +313,14 @@ if __name__ == '__main__':
|
|||
print("exit...")
|
||||
break
|
||||
|
||||
print(f"{roles[1]}: ", end="")
|
||||
|
||||
prompt = get_prompt(model.config.mm_use_im_start_end, first_round, conv, user_input)
|
||||
first_round = False
|
||||
input_ids = tokenizer_image_token(
|
||||
prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to('xpu')
|
||||
stopping_criteria = get_stopping_criteria(conv, tokenizer, input_ids)
|
||||
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
||||
|
||||
# Generate predicted tokens
|
||||
with torch.inference_mode():
|
||||
|
|
@ -326,13 +330,11 @@ if __name__ == '__main__':
|
|||
images=image_tensor,
|
||||
do_sample=True,
|
||||
max_new_tokens=args.n_predict,
|
||||
streamer=streamer,
|
||||
use_cache=True,
|
||||
stopping_criteria=[stopping_criteria])
|
||||
end = time.time()
|
||||
#print(f'Inference time: {end-st} s')
|
||||
|
||||
outputs = tokenizer.decode(
|
||||
output_ids[0, input_ids.shape[1]:].cpu(), skip_special_tokens=True).strip()
|
||||
outputs = tokenizer.decode(output_ids[0, :].cpu(), skip_special_tokens=True).strip()
|
||||
conv.messages[-1][-1] = outputs
|
||||
print(f"{roles[1]}: ", end="")
|
||||
print(outputs)
|
||||
|
|
|
|||
|
|
@ -114,7 +114,8 @@ def llama_model_forward_4_36(
|
|||
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||
from ipex_llm.transformers.kv import DynamicFp8Cache
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
if use_cache and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input_ids):
|
||||
input = input_ids if input_ids is not None else inputs_embeds
|
||||
if use_cache and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input):
|
||||
if not isinstance(past_key_values, DynamicFp8Cache):
|
||||
past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
|
||||
return llama_model_forward_4_36_internal(
|
||||
|
|
|
|||
Loading…
Reference in a new issue