change mixed_precision to q6_k (#11706)
This commit is contained in:
		
							parent
							
								
									1baa3efe0e
								
							
						
					
					
						commit
						aa98ef96fe
					
				
					 1 changed files with 1 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -394,7 +394,7 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
 | 
			
		|||
                    if mixed_precision and is_lm_head(name, model_config, out_features):
 | 
			
		||||
                        if cur_qtype in [ggml_tensor_qtype["sym_int4"],
 | 
			
		||||
                                         ggml_tensor_qtype["asym_int4"]]:
 | 
			
		||||
                            cur_qtype = ggml_tensor_qtype["sym_int8"]
 | 
			
		||||
                            cur_qtype = ggml_tensor_qtype["q6_k"]
 | 
			
		||||
 | 
			
		||||
                    # check hidden size whether is a multiple of 256
 | 
			
		||||
                    cur_qtype = check_hidden_size(cur_qtype, in_features)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue