Add qwen2-1.5b in l0 pipeline example (#12306)
This commit is contained in:
		
							parent
							
								
									30f668c206
								
							
						
					
					
						commit
						4892df61c9
					
				
					 3 changed files with 13 additions and 3 deletions
				
			
		| 
						 | 
					@ -8,6 +8,7 @@ In this directory, you will find examples on how to directly run HuggingFace `tr
 | 
				
			||||||
|------------|----------------------------------------------------------------|
 | 
					|------------|----------------------------------------------------------------|
 | 
				
			||||||
| Llama2 | [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
 | 
					| Llama2 | [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) |
 | 
				
			||||||
| Llama3 | [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
 | 
					| Llama3 | [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
 | 
				
			||||||
 | 
					| Qwen2 | [Qwen/Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) |
 | 
				
			||||||
| Qwen2.5 | [Qwen/Qwen2.5-7b-Instruct](https://huggingface.co/Qwen/Qwen2.5-7b-Instruct) |
 | 
					| Qwen2.5 | [Qwen/Qwen2.5-7b-Instruct](https://huggingface.co/Qwen/Qwen2.5-7b-Instruct) |
 | 
				
			||||||
| Baichuan2 | [baichuan-inc/Baichuan2-7B-Chat](https://huggingface.co/baichuan-inc/Baichuan-7B-Chat) |
 | 
					| Baichuan2 | [baichuan-inc/Baichuan2-7B-Chat](https://huggingface.co/baichuan-inc/Baichuan-7B-Chat) |
 | 
				
			||||||
| MiniCPM | [openbmb/MiniCPM-1B-sft-bf16](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16) |
 | 
					| MiniCPM | [openbmb/MiniCPM-1B-sft-bf16](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16) |
 | 
				
			||||||
| 
						 | 
					@ -50,6 +51,9 @@ python llama3.py
 | 
				
			||||||
:: to run Qwen2.5-7b-Instruct
 | 
					:: to run Qwen2.5-7b-Instruct
 | 
				
			||||||
python qwen.py
 | 
					python qwen.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					:: to run Qwen2-1.5b-Instruct
 | 
				
			||||||
 | 
					python qwen.py --repo-id-or-model-path "Qwen/Qwen2-1.5B-Instruct" --load_in_low_bit "sym_int8"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
:: to run Baichuan2-7B-Chat
 | 
					:: to run Baichuan2-7B-Chat
 | 
				
			||||||
python baichuan2.py
 | 
					python baichuan2.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,8 +32,8 @@ if __name__ == "__main__":
 | 
				
			||||||
    parser.add_argument(
 | 
					    parser.add_argument(
 | 
				
			||||||
        "--repo-id-or-model-path",
 | 
					        "--repo-id-or-model-path",
 | 
				
			||||||
        type=str,
 | 
					        type=str,
 | 
				
			||||||
        default="Qwen/Qwen2.5-7B-Instruct",  # Or Qwen2-7B-Instruct
 | 
					        default="Qwen/Qwen2.5-7B-Instruct",  # Or Qwen2-7B-Instruct, Qwen2-1.5B-Instruct
 | 
				
			||||||
        help="The huggingface repo id for the Baichuan2 model to be downloaded"
 | 
					        help="The huggingface repo id for the Qwen model to be downloaded"
 | 
				
			||||||
        ", or the path to the huggingface checkpoint folder",
 | 
					        ", or the path to the huggingface checkpoint folder",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    parser.add_argument("--lowbit-path", type=str,
 | 
					    parser.add_argument("--lowbit-path", type=str,
 | 
				
			||||||
| 
						 | 
					@ -47,6 +47,8 @@ if __name__ == "__main__":
 | 
				
			||||||
    parser.add_argument("--n-predict", type=int, default=32, help="Max tokens to predict")
 | 
					    parser.add_argument("--n-predict", type=int, default=32, help="Max tokens to predict")
 | 
				
			||||||
    parser.add_argument("--max-context-len", type=int, default=1024)
 | 
					    parser.add_argument("--max-context-len", type=int, default=1024)
 | 
				
			||||||
    parser.add_argument("--max-prompt-len", type=int, default=960)
 | 
					    parser.add_argument("--max-prompt-len", type=int, default=960)
 | 
				
			||||||
 | 
					    parser.add_argument('--load_in_low_bit', type=str, default="sym_int4",
 | 
				
			||||||
 | 
					                        help='Load in low bit to use')
 | 
				
			||||||
    parser.add_argument("--disable-transpose-value-cache", action="store_true", default=False)
 | 
					    parser.add_argument("--disable-transpose-value-cache", action="store_true", default=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    args = parser.parse_args()
 | 
					    args = parser.parse_args()
 | 
				
			||||||
| 
						 | 
					@ -56,6 +58,7 @@ if __name__ == "__main__":
 | 
				
			||||||
        model = AutoModelForCausalLM.from_pretrained(model_path,
 | 
					        model = AutoModelForCausalLM.from_pretrained(model_path,
 | 
				
			||||||
                                                     optimize_model=True,
 | 
					                                                     optimize_model=True,
 | 
				
			||||||
                                                     pipeline=True,
 | 
					                                                     pipeline=True,
 | 
				
			||||||
 | 
					                                                     load_in_low_bit=args.load_in_low_bit,
 | 
				
			||||||
                                                     max_context_len=args.max_context_len,
 | 
					                                                     max_context_len=args.max_context_len,
 | 
				
			||||||
                                                     max_prompt_len=args.max_prompt_len,
 | 
					                                                     max_prompt_len=args.max_prompt_len,
 | 
				
			||||||
                                                     torch_dtype=torch.float16,
 | 
					                                                     torch_dtype=torch.float16,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,7 +27,10 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir):
 | 
				
			||||||
    rms_norm_eps = model.config.rms_norm_eps
 | 
					    rms_norm_eps = model.config.rms_norm_eps
 | 
				
			||||||
    vocab_size = model.config.vocab_size
 | 
					    vocab_size = model.config.vocab_size
 | 
				
			||||||
    model_norm = model.model.norm
 | 
					    model_norm = model.model.norm
 | 
				
			||||||
    lm_heads = model.lm_head.lm_heads  # Qwen2 is always SlicedLMHead
 | 
					    if model.config.intermediate_size == 18944:
 | 
				
			||||||
 | 
					        lm_heads = model.lm_head.lm_heads  # Qwen2-7B is always SlicedLMHead
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        lm_heads = [model.lm_head]
 | 
				
			||||||
    if n_splits_linear == 1:
 | 
					    if n_splits_linear == 1:
 | 
				
			||||||
        weights = [(lm_heads[0].weight, lm_heads[0].scale)]
 | 
					        weights = [(lm_heads[0].weight, lm_heads[0].scale)]
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue