From 9f02f961603ef2776b1e3d77933200a2dafadce8 Mon Sep 17 00:00:00 2001 From: Heyang Sun <60865256+Uxito-Ada@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:07:34 +0800 Subject: [PATCH] [LLM] support for Yi AWQ model (#9648) --- .../Advanced-Quantizations/AWQ/README.md | 2 ++ .../Advanced-Quantizations/AWQ/README.md | 2 ++ python/llm/src/bigdl/llm/transformers/awq/awq.py | 3 +++ 3 files changed, 7 insertions(+) diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md index 918499da..f961a9a9 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md @@ -7,6 +7,7 @@ This example shows how to directly run 4-bit AWQ models using BigDL-LLM on Intel - [Mistral-7B-v0.1-AWQ](https://huggingface.co/TheBloke/Mistral-7B-v0.1-AWQ) - [vicuna-7B-v1.5-AWQ](https://huggingface.co/TheBloke/vicuna-7B-v1.5-AWQ) - [vicuna-13B-v1.5-AWQ](https://huggingface.co/TheBloke/vicuna-13B-v1.5-AWQ) +- [Yi-6B-AWQ](https://huggingface.co/TheBloke/Yi-6B-AWQ) ## Requirements To run these examples with BigDL-LLM, we have some recommended requirements for your machine, please refer to [here](../../../README.md#system-support) for more information. @@ -23,6 +24,7 @@ pip install autoawq==0.1.6 --no-deps pip install --pre --upgrade bigdl-llm[all] # install bigdl-llm with 'all' option pip install transformers==4.35.0 pip install accelerate==0.24.1 +pip install einops ``` ### 2. Run diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md b/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md index c5464055..3c7dfaca 100644 --- a/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md +++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/AWQ/README.md @@ -7,6 +7,7 @@ This example shows how to directly run 4-bit AWQ models using BigDL-LLM on Intel - [Mistral-7B-v0.1-AWQ](https://huggingface.co/TheBloke/Mistral-7B-v0.1-AWQ) - [vicuna-7B-v1.5-AWQ](https://huggingface.co/TheBloke/vicuna-7B-v1.5-AWQ) - [vicuna-13B-v1.5-AWQ](https://huggingface.co/TheBloke/vicuna-13B-v1.5-AWQ) +- [Yi-6B-AWQ](https://huggingface.co/TheBloke/Yi-6B-AWQ) ## Requirements To run these examples with BigDL-LLM, we have some recommended requirements for your machine, please refer to [here](../../../README.md#requirements) for more information. @@ -23,6 +24,7 @@ pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-w pip install transformers==4.35.0 pip install autoawq==0.1.6 --no-deps pip install accelerate==0.24.1 +pip install einops ``` ### 2. Configures OneAPI environment variables diff --git a/python/llm/src/bigdl/llm/transformers/awq/awq.py b/python/llm/src/bigdl/llm/transformers/awq/awq.py index f3112157..671b389d 100644 --- a/python/llm/src/bigdl/llm/transformers/awq/awq.py +++ b/python/llm/src/bigdl/llm/transformers/awq/awq.py @@ -70,6 +70,7 @@ layer_type_dict = { "mistral": "MistralDecoderLayer", "gpt_neox": "GPTNeoXDecoderLayer", "aquila": "AquilaDecoderLayer", + "Yi": "YiDecoderLayer", } @@ -133,6 +134,8 @@ def get_blocks(model): layers = model.gpt_neox.layers elif "mistral" in str(model.__class__).lower(): layers = model.model.layers + elif "yi" in str(model.__class__).lower(): + layers = model.model.layers else: invalidInputError(False, f"Model type {type(model)} isn't supported.") return layers