LLM: Fix baichuan pre-normalize model tensor assigning issue when loading (#9481)
* No need to normalized when loading
This commit is contained in:
		
							parent
							
								
									bc06bec90e
								
							
						
					
					
						commit
						30abd304a7
					
				
					 1 changed files with 7 additions and 3 deletions
				
			
		| 
						 | 
					@ -309,8 +309,12 @@ def _optimize_pre(model):
 | 
				
			||||||
        if hasattr(model, 'lm_head') and model.lm_head is not None:
 | 
					        if hasattr(model, 'lm_head') and model.lm_head is not None:
 | 
				
			||||||
            # do we need to check the class instance?
 | 
					            # do we need to check the class instance?
 | 
				
			||||||
            vocab_size, hidden_size = model.lm_head.weight.shape
 | 
					            vocab_size, hidden_size = model.lm_head.weight.shape
 | 
				
			||||||
            norm_weight = nn.functional.normalize(model.lm_head.weight.data)
 | 
					            lm_head_weight_data = model.lm_head.weight.data
 | 
				
			||||||
            model.lm_head = nn.Linear(hidden_size, vocab_size, bias=False)
 | 
					            model.lm_head = nn.Linear(hidden_size, vocab_size, bias=False,
 | 
				
			||||||
 | 
					                                      device=lm_head_weight_data.device)
 | 
				
			||||||
 | 
					            # In which case we are NOT loading the normalized weights
 | 
				
			||||||
 | 
					            if model.lm_head.weight.data.device != "meta":
 | 
				
			||||||
 | 
					                norm_weight = nn.functional.normalize(lm_head_weight_data)
 | 
				
			||||||
                model.lm_head.weight.data = norm_weight
 | 
					                model.lm_head.weight.data = norm_weight
 | 
				
			||||||
    return model
 | 
					    return model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue