LLM: add compressed chatglm3 model (#9892)
* LLM: add compressed chatglm3 model * small fix * revert github action
This commit is contained in:
		
							parent
							
								
									9e2ac5291b
								
							
						
					
					
						commit
						100e0a87e5
					
				
					 2 changed files with 10 additions and 4 deletions
				
			
		| 
						 | 
					@ -365,12 +365,17 @@ def run_transformer_int4_gpu(repo_id,
 | 
				
			||||||
    # Load model in 4 bit,
 | 
					    # Load model in 4 bit,
 | 
				
			||||||
    # which convert the relevant layers in the model into INT4 format
 | 
					    # which convert the relevant layers in the model into INT4 format
 | 
				
			||||||
    st = time.perf_counter()
 | 
					    st = time.perf_counter()
 | 
				
			||||||
    if repo_id in CHATGLM_IDS:
 | 
					    origin_repo_id = repo_id.replace("-4bit", "")
 | 
				
			||||||
        model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True,
 | 
					    if origin_repo_id in CHATGLM_IDS:
 | 
				
			||||||
                                          trust_remote_code=True, use_cache=True).eval()
 | 
					        if "4bit" in repo_id:
 | 
				
			||||||
 | 
					            model = AutoModel.load_low_bit(model_path, optimize_model=True,
 | 
				
			||||||
 | 
					                                            trust_remote_code=True, use_cache=True).eval()  
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True,
 | 
				
			||||||
 | 
					                                            trust_remote_code=True, use_cache=True).eval()
 | 
				
			||||||
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
        model = model.to('xpu')
 | 
					        model = model.to('xpu')
 | 
				
			||||||
    elif repo_id in LLAMA_IDS:
 | 
					    elif origin_repo_id in LLAMA_IDS:
 | 
				
			||||||
        model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
 | 
					        model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
 | 
				
			||||||
                                                     use_cache=True).eval()
 | 
					                                                     use_cache=True).eval()
 | 
				
			||||||
        tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					        tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,6 +2,7 @@ repo_id:
 | 
				
			||||||
  - 'meta-llama/Llama-2-7b-chat-hf'
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf'
 | 
				
			||||||
  - 'meta-llama/Llama-2-13b-chat-hf'
 | 
					  - 'meta-llama/Llama-2-13b-chat-hf'
 | 
				
			||||||
  - 'THUDM/chatglm2-6b'
 | 
					  - 'THUDM/chatglm2-6b'
 | 
				
			||||||
 | 
					  - 'THUDM/chatglm3-6b-4bit'
 | 
				
			||||||
  - 'tiiuae/falcon-7b-instruct-with-patch'
 | 
					  - 'tiiuae/falcon-7b-instruct-with-patch'
 | 
				
			||||||
  - 'mosaicml/mpt-7b-chat'
 | 
					  - 'mosaicml/mpt-7b-chat'
 | 
				
			||||||
  - 'redpajama/gptneox-7b-redpajama-bf16'
 | 
					  - 'redpajama/gptneox-7b-redpajama-bf16'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue