diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md index 7dc3dedc..7f5061ec 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen-vl/README.md @@ -20,6 +20,7 @@ conda activate llm # install the latest ipex-llm nightly build with 'all' option pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` @@ -32,6 +33,7 @@ conda activate llm pip install --pre --upgrade ipex-llm[all] +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib ``` diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md index cee06098..992ea9ee 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/qwen/README.md @@ -22,6 +22,8 @@ conda activate llm # install the latest ipex-llm nightly build with 'all' option pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + +pip install "transformers<4.37.0" pip install tiktoken einops transformers_stream_generator # additional package required for Qwen-7B-Chat to conduct generation ``` @@ -32,6 +34,8 @@ conda create -n llm python=3.11 conda activate llm pip install --pre --upgrade ipex-llm[all] + +pip install "transformers<4.37.0" pip install tiktoken einops transformers_stream_generator ``` diff --git a/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md index 25744465..f6f5f1ff 100644 --- a/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md +++ b/python/llm/example/CPU/PyTorch-Models/Model/qwen-vl/README.md @@ -19,6 +19,8 @@ conda activate llm # install the latest ipex-llm nightly build with 'all' option pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` @@ -29,6 +31,8 @@ conda create -n llm python=3.11 conda activate llm pip install --pre --upgrade ipex-llm[all] + +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib ``` diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md index 500e2b0f..8311f7f1 100644 --- a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md +++ b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md @@ -15,6 +15,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install tiktoken einops transformers_stream_generator # additional package required for Qwen-7B-Chat to conduct generation ``` @@ -27,6 +28,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install tiktoken einops transformers_stream_generator # additional package required for Qwen-7B-Chat to conduct generation ``` diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md index fb02816b..73723266 100644 --- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md +++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md @@ -15,6 +15,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` @@ -27,6 +28,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md index 67c0fb26..7dea109b 100644 --- a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md +++ b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md @@ -17,6 +17,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install librosa soundfile datasets pip install accelerate pip install SpeechRecognition sentencepiece colorama @@ -33,6 +34,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install librosa soundfile datasets pip install accelerate pip install SpeechRecognition sentencepiece colorama diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md index 29a4dc46..ac664fb0 100644 --- a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md +++ b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md @@ -16,6 +16,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install datasets soundfile librosa # required by audio processing ``` @@ -28,6 +29,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install datasets soundfile librosa # required by audio processing ``` diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md index 461ae53a..77e0f1cf 100644 --- a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md +++ b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md @@ -16,7 +16,6 @@ conda activate llm pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install einops # install dependencies required by llava -pip install transformers==4.36.2 git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary cp generate.py ./LLaVA/ # copy our example to the LLaVA folder @@ -34,7 +33,6 @@ conda activate llm pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install einops # install dependencies required by llava -pip install transformers==4.36.2 git clone https://github.com/haotian-liu/LLaVA.git # clone the llava libary copy generate.py .\LLaVA\ # copy our example to the LLaVA folder diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md index 5f9a617a..c480c545 100644 --- a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md +++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md @@ -15,6 +15,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` @@ -27,6 +28,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install "transformers<4.37.0" pip install accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib # additional package required for Qwen-VL-Chat to conduct generation ``` diff --git a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md index 171ff392..98806eda 100644 --- a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md +++ b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md @@ -15,6 +15,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install "datasets<2.18" soundfile # additional package required for SpeechT5 to conduct generation ``` @@ -27,6 +28,7 @@ conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +pip install transformers==4.36.2 pip install "datasets<2.18" soundfile # additional package required for SpeechT5 to conduct generation ``` diff --git a/python/llm/setup.py b/python/llm/setup.py index f9adc5f3..4386293c 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -53,7 +53,7 @@ libs_dir = os.path.join(llm_home, "ipex_llm", "libs") cpu_torch_version = ["torch==2.1.2+cpu;platform_system=='Linux'", "torch==2.1.2;platform_system=='Windows'"] CONVERT_DEP = ['numpy == 1.26.4', # lastet 2.0.0b1 will cause error - 'transformers == 4.36.2', 'sentencepiece', 'tokenizers == 0.15.2', + 'transformers == 4.37.0', 'sentencepiece', 'tokenizers == 0.15.2', 'accelerate == 0.23.0', 'tabulate'] + cpu_torch_version SERVING_DEP = ['fschat[model_worker, webui] == 0.2.36', 'protobuf'] diff --git a/python/llm/test/inference_gpu/test_transformers_api.py b/python/llm/test/inference_gpu/test_transformers_api.py index ae9c6b9b..b29c2599 100644 --- a/python/llm/test/inference_gpu/test_transformers_api.py +++ b/python/llm/test/inference_gpu/test_transformers_api.py @@ -36,7 +36,7 @@ print(f'Running on {device}') (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MPT_7B_ORIGIN_PATH')), # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), - # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), + # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0 ]) def test_completion(Model, Tokenizer, model_path, prompt, answer): with torch.inference_mode(): diff --git a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py index f45f017e..edb2adf1 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py +++ b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py @@ -32,7 +32,7 @@ TEST_MODEL_LIST = [ ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), - ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), + # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0 ] class Test_Optimize_Gpu_Model: diff --git a/python/llm/test/inference_gpu/test_transformers_api_attention.py b/python/llm/test/inference_gpu/test_transformers_api_attention.py index 4db5ba8b..84bdcf8e 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_attention.py +++ b/python/llm/test/inference_gpu/test_transformers_api_attention.py @@ -34,7 +34,7 @@ TEST_MODEL_LIST = [ ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), - ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), + # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0 ] class Test_Optimize_Gpu_Model: diff --git a/python/llm/test/inference_gpu/test_transformers_api_mlp.py b/python/llm/test/inference_gpu/test_transformers_api_mlp.py index cf0581a5..c6229d73 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_mlp.py +++ b/python/llm/test/inference_gpu/test_transformers_api_mlp.py @@ -27,7 +27,7 @@ print(f'Running on {device}') PROMPT = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" TEST_MODEL_LIST = [ - ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), + # ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), # qwen requires transformers<4.37.0 ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')) ]