[NPU] Fix MTL and ARL support (#12580)
This commit is contained in:
		
							parent
							
								
									80f2fdc37b
								
							
						
					
					
						commit
						4e7e988f70
					
				
					 3 changed files with 12 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -88,6 +88,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a
 | 
			
		|||
- For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxV (code name Lunar Lake)**:
 | 
			
		||||
 | 
			
		||||
  - For Intel Core™ Ultra 7 Processor 258V:
 | 
			
		||||
 | 
			
		||||
      No runtime configuration required.
 | 
			
		||||
 | 
			
		||||
  - For Intel Core™ Ultra 5 Processor 228V & 226V:
 | 
			
		||||
| 
						 | 
				
			
			@ -97,7 +98,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a
 | 
			
		|||
 | 
			
		||||
- For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxK (code name Arrow Lake)**:
 | 
			
		||||
   ```cmd
 | 
			
		||||
   set IPEX_LLM_NPU_DISABLE_COMPILE_OPT=1
 | 
			
		||||
   set IPEX_LLM_NPU_ARL=1
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
- For **Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake)**:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,6 +37,10 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
 | 
			
		|||
        os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0"
 | 
			
		||||
        os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"
 | 
			
		||||
 | 
			
		||||
    if os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1":
 | 
			
		||||
        # For ARL support
 | 
			
		||||
        os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"
 | 
			
		||||
 | 
			
		||||
    if model.config.model_type == "baichuan":
 | 
			
		||||
        # process NormHead module in Baichuan2 7B
 | 
			
		||||
        if hasattr(model, 'lm_head') and model.lm_head is not None:
 | 
			
		||||
| 
						 | 
				
			
			@ -144,7 +148,9 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
 | 
			
		|||
                # do not split mlp down_proj for Qwen2-7B & sym_int8
 | 
			
		||||
                n_splits_down_proj = 1
 | 
			
		||||
            else:
 | 
			
		||||
                n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
 | 
			
		||||
                n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
 | 
			
		||||
                                           os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
 | 
			
		||||
                                           os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
 | 
			
		||||
        else:
 | 
			
		||||
            invalidInputError(
 | 
			
		||||
                model.config.hidden_size % quantization_group_size == 0 and
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -205,7 +205,9 @@ def convert_llm(model: torch.nn.Module,
 | 
			
		|||
            # do not split mlp down_proj for Qwen2-7B & sym_int8
 | 
			
		||||
            n_splits_down_proj = 1
 | 
			
		||||
        else:
 | 
			
		||||
            n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
 | 
			
		||||
            n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
 | 
			
		||||
                                       os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
 | 
			
		||||
                                       os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
 | 
			
		||||
    else:
 | 
			
		||||
        n_splits_linear = model.config.hidden_size // group_size
 | 
			
		||||
        n_splits_down_proj = model.config.intermediate_size // group_size
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue