From 8fa98e27428e07858d5c0ac3f6c9b62ec0f28c80 Mon Sep 17 00:00:00 2001 From: "Jin, Qiao" <89779290+JinBridger@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:07:51 +0800 Subject: [PATCH] Remove Qwen2-7b from NPU example for "Run Optimized Models (Experimental)" (#12245) * Remove qwen2-7b from npu example readme * fix --- .../NPU/HF-Transformers-AutoModels/LLM/README.md | 16 +++------------- .../NPU/HF-Transformers-AutoModels/LLM/qwen.py | 2 +- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md index dadd8ce6..27a405b0 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md @@ -83,7 +83,6 @@ The examples below show how to run the **_optimized HuggingFace model implementa - [Llama2-7B](./llama.py) - [Llama3-8B](./llama.py) - [Qwen2-1.5B](./qwen.py) -- [Qwen2-7B](./qwen.py) - [Qwen2.5-7B](./qwen.py) - [MiniCPM-1B](./minicpm.py) - [MiniCPM-2B](./minicpm.py) @@ -91,13 +90,13 @@ The examples below show how to run the **_optimized HuggingFace model implementa ### Recommended NPU Driver Version for MTL Users #### 32.0.100.2540 -Supported models: Llama2-7B, Llama3-8B, Qwen2-1.5B, Qwen2-7B, MiniCPM-1B, MiniCPM-2B, Baichuan2-7B +Supported models: Llama2-7B, Llama3-8B, Qwen2-1.5B, MiniCPM-1B, MiniCPM-2B, Baichuan2-7B ### Recommended NPU Driver Version for LNL Users #### 32.0.100.2625 Supported models: Llama2-7B, MiniCPM-1B, Baichuan2-7B #### 32.0.101.2715 -Supported models: Llama3-8B, MiniCPM-2B, Qwen2-7B, Qwen2-1.5B, Qwen2.5-7B +Supported models: Llama3-8B, MiniCPM-2B, Qwen2-1.5B, Qwen2.5-7B ### Run ```cmd @@ -110,9 +109,6 @@ python llama.py --repo-id-or-model-path meta-llama/Meta-Llama-3-8B-Instruct :: to run Qwen2-1.5B-Instruct (LNL driver version: 32.0.101.2715) python qwen.py -:: to run Qwen2-7B-Instruct (LNL driver version: 32.0.101.2715) -python qwen.py --repo-id-or-model-path Qwen/Qwen2-7B-Instruct - :: to run Qwen2.5-7B-Instruct (LNL driver version: 32.0.101.2715) python qwen.py --repo-id-or-model-path Qwen/Qwen2.5-7B-Instruct @@ -152,9 +148,6 @@ python llama.py --repo-id-or-model-path meta-llama/Meta-Llama-3-8B-Instruct --d :: to run Qwen2-1.5B-Instruct (LNL driver version: 32.0.101.2715) python qwen.py --disable-transpose-value-cache -:: to run Qwen2-7B-Instruct LNL driver version: 32.0.101.2715) -python qwen.py --repo-id-or-model-path Qwen/Qwen2-7B-Instruct --disable-transpose-value-cache - :: to run Qwen2.5-7B-Instruct LNL driver version: 32.0.101.2715) python qwen.py --repo-id-or-model-path Qwen/Qwen2.5-7B-Instruct --disable-transpose-value-cache @@ -168,11 +161,8 @@ python minicpm.py --repo-id-or-model-path openbmb/MiniCPM-2B-sft-bf16 --disable- python baichuan2.py --disable-transpose-value-cache ``` -For [Qwen2-7B](./qwen.py) and [Qwen2.5-7B](./qwen.py), you could also try to enable mixed precision optimization when encountering output problems: +For [Qwen2.5-7B](./qwen.py), you could also try to enable mixed precision optimization when encountering output problems: -```cmd -python qwen.py --repo-id-or-model-path Qwen/Qwen2-7B-Instruct --mixed-precision -``` ```cmd python qwen.py --repo-id-or-model-path Qwen/Qwen2.5-7B-Instruct --mixed-precision ``` diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py index 835a72de..dd1e958c 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/qwen.py @@ -33,7 +33,7 @@ if __name__ == "__main__": parser.add_argument( "--repo-id-or-model-path", type=str, - default="Qwen/Qwen2-1.5B-Instruct", + default="Qwen/Qwen2.5-7B-Instruct", help="The huggingface repo id for the Qwen2 or Qwen2.5 model to be downloaded" ", or the path to the huggingface checkpoint folder", )