[NPU] Fix MTL and ARL support (#12580)

This commit is contained in:
binbin Deng 2024-12-19 16:55:30 +08:00 committed by GitHub
parent 80f2fdc37b
commit 4e7e988f70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 12 additions and 3 deletions

View file

@ -88,6 +88,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a
- For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxV (code name Lunar Lake)**: - For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxV (code name Lunar Lake)**:
- For Intel Core™ Ultra 7 Processor 258V: - For Intel Core™ Ultra 7 Processor 258V:
No runtime configuration required. No runtime configuration required.
- For Intel Core™ Ultra 5 Processor 228V & 226V: - For Intel Core™ Ultra 5 Processor 228V & 226V:
@ -97,7 +98,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a
- For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxK (code name Arrow Lake)**: - For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxK (code name Arrow Lake)**:
```cmd ```cmd
set IPEX_LLM_NPU_DISABLE_COMPILE_OPT=1 set IPEX_LLM_NPU_ARL=1
``` ```
- For **Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake)**: - For **Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake)**:

View file

@ -37,6 +37,10 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0" os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0"
os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1" os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"
if os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1":
# For ARL support
os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"
if model.config.model_type == "baichuan": if model.config.model_type == "baichuan":
# process NormHead module in Baichuan2 7B # process NormHead module in Baichuan2 7B
if hasattr(model, 'lm_head') and model.lm_head is not None: if hasattr(model, 'lm_head') and model.lm_head is not None:
@ -144,7 +148,9 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
# do not split mlp down_proj for Qwen2-7B & sym_int8 # do not split mlp down_proj for Qwen2-7B & sym_int8
n_splits_down_proj = 1 n_splits_down_proj = 1
else: else:
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1 n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
else: else:
invalidInputError( invalidInputError(
model.config.hidden_size % quantization_group_size == 0 and model.config.hidden_size % quantization_group_size == 0 and

View file

@ -205,7 +205,9 @@ def convert_llm(model: torch.nn.Module,
# do not split mlp down_proj for Qwen2-7B & sym_int8 # do not split mlp down_proj for Qwen2-7B & sym_int8
n_splits_down_proj = 1 n_splits_down_proj = 1
else: else:
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1 n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
else: else:
n_splits_linear = model.config.hidden_size // group_size n_splits_linear = model.config.hidden_size // group_size
n_splits_down_proj = model.config.intermediate_size // group_size n_splits_down_proj = model.config.intermediate_size // group_size