Update ipex-llm default transformers version to 4.37.0 (#11859)

* Update default transformers version to 4.37.0 * Add dependency requirements for qwen and qwen-vl * Temp fix transformers version for these not yet verified models * Skip qwen test in UT for now as it requires transformers<4.37.0
2024-08-20 17:37:58 +08:00 · 2024-08-20 17:37:58 +08:00 · 5e8286f72d
commit 5e8286f72d
parent d4ee0a89f3
15 changed files with 27 additions and 7 deletions
--- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md
+++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md
@ -20,6 +20,7 @@ conda activate llm
 # install the latest ipex-llm nightly build with 'all' option
 pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation

 ```
@ -32,6 +33,7 @@ conda activate llm

 pip install --pre --upgrade ipex-llm[all]

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib

 ```
--- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md
+++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md
@ -22,6 +22,8 @@ conda activate llm

 # install the latest ipex-llm nightly build with 'all' option
 pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu
+
+pip install "transformers<4.37.0"
 pip install tiktoken einops transformers_stream_generator  # additional package required for Qwen-7B-Chat to conduct generation
 ```

@ -32,6 +34,8 @@ conda create -n llm python=3.11
 conda activate llm

 pip install --pre --upgrade ipex-llm[all]
+
+pip install "transformers<4.37.0"
 pip install tiktoken einops transformers_stream_generator
 ```

--- a/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md
+++ b/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md
@ -19,6 +19,8 @@ conda activate llm

 # install the latest ipex-llm nightly build with 'all' option
 pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu
+
+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation
 ```

@ -29,6 +31,8 @@ conda create -n llm python=3.11
 conda activate llm

 pip install --pre --upgrade ipex-llm[all]
+
+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib
 ```

--- a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
@ -15,6 +15,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install tiktoken einops transformers_stream_generator  # additional package required for Qwen-7B-Chat to conduct generation
 ```

@ -27,6 +28,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install tiktoken einops transformers_stream_generator  # additional package required for Qwen-7B-Chat to conduct generation
 ```

--- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
@ -15,6 +15,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation
 ```

@ -27,6 +28,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation
 ```

--- a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
@ -17,6 +17,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install librosa soundfile datasets
 pip install accelerate
 pip install SpeechRecognition sentencepiece colorama
@ -33,6 +34,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install librosa soundfile datasets
 pip install accelerate
 pip install SpeechRecognition sentencepiece colorama
--- a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
@ -16,6 +16,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install datasets soundfile librosa # required by audio processing
 ```

@ -28,6 +29,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install datasets soundfile librosa # required by audio processing
 ```

--- a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
@ -16,7 +16,6 @@ conda activate llm
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

 pip install einops # install dependencies required by llava
-pip install transformers==4.36.2

 git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
 cp generate.py ./LLaVA/ # copy our example to the LLaVA folder
@ -34,7 +33,6 @@ conda activate llm
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

 pip install einops # install dependencies required by llava
-pip install transformers==4.36.2

 git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary
 copy generate.py .\LLaVA\ # copy our example to the LLaVA folder
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
@ -15,6 +15,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation
 ```

@ -27,6 +28,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install "transformers<4.37.0"
 pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation
 ```

--- a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
@ -15,6 +15,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install "datasets<2.18" soundfile # additional package required for SpeechT5 to conduct generation
 ```

@ -27,6 +28,7 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

+pip install transformers==4.36.2
 pip install "datasets<2.18" soundfile # additional package required for SpeechT5 to conduct generation
 ```

--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@ -53,7 +53,7 @@ libs_dir = os.path.join(llm_home, "ipex_llm", "libs")

 cpu_torch_version = ["torch==2.1.2+cpu;platform_system=='Linux'", "torch==2.1.2;platform_system=='Windows'"]
 CONVERT_DEP = ['numpy == 1.26.4', # lastet 2.0.0b1 will cause error
-               'transformers == 4.36.2', 'sentencepiece', 'tokenizers == 0.15.2',
+               'transformers == 4.37.0', 'sentencepiece', 'tokenizers == 0.15.2',
               'accelerate == 0.23.0', 'tabulate'] + cpu_torch_version

 SERVING_DEP = ['fschat[model_worker, webui] == 0.2.36', 'protobuf']
--- a/python/llm/test/inference_gpu/test_transformers_api.py
+++ b/python/llm/test/inference_gpu/test_transformers_api.py
@ -36,7 +36,7 @@ print(f'Running on {device}')
    (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MPT_7B_ORIGIN_PATH')),
    # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')),
    # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')),
-    # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')),
+    # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0
    ])
 def test_completion(Model, Tokenizer, model_path, prompt, answer):
    with torch.inference_mode():
--- a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py
+++ b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py
@ -32,7 +32,7 @@ TEST_MODEL_LIST = [
    ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')),
    ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')),
    ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')),
-    ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')),
+    # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0
 ]

 class Test_Optimize_Gpu_Model:
--- a/python/llm/test/inference_gpu/test_transformers_api_attention.py
+++ b/python/llm/test/inference_gpu/test_transformers_api_attention.py
@ -34,7 +34,7 @@ TEST_MODEL_LIST = [
    ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')),
    ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')),
    ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')),
-    ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')),
+    # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0
 ]

 class Test_Optimize_Gpu_Model:
--- a/python/llm/test/inference_gpu/test_transformers_api_mlp.py
+++ b/python/llm/test/inference_gpu/test_transformers_api_mlp.py
@ -27,7 +27,7 @@ print(f'Running on {device}')

 PROMPT = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"
 TEST_MODEL_LIST = [
-    ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')),
+    # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0
    ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')),
    ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH'))
 ]