fix qwen 14b fp6 abnormal output (#11583)

This commit is contained in:
Yina Chen 2024-07-16 05:59:00 +03:00 committed by GitHub
parent c279849d27
commit 99c22745b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -667,7 +667,7 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None,
return model return model
def _optimize_pre(model): def _optimize_pre(model, qtype=None):
try: try:
from sentence_transformers.SentenceTransformer import SentenceTransformer from sentence_transformers.SentenceTransformer import SentenceTransformer
if isinstance(model, SentenceTransformer): if isinstance(model, SentenceTransformer):
@ -743,6 +743,7 @@ def _optimize_pre(model):
if should_apply_merge_qkv: if should_apply_merge_qkv:
from ipex_llm.transformers.models.qwen2 import merge_qkv from ipex_llm.transformers.models.qwen2 import merge_qkv
model.apply(merge_qkv) model.apply(merge_qkv)
if qtype != ggml_tensor_qtype["fp6"]:
from ipex_llm.transformers.models.qwen2 import padding_mlp from ipex_llm.transformers.models.qwen2 import padding_mlp
model.apply(padding_mlp) model.apply(padding_mlp)
if model.config.model_type == "qwen2_moe": if model.config.model_type == "qwen2_moe":
@ -795,7 +796,7 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
return model return model
if optimize_model: if optimize_model:
model = _optimize_pre(model) model = _optimize_pre(model, qtype)
act_order = False act_order = False
if getattr(model, "quantization_method", None) == "gptq": if getattr(model, "quantization_method", None) == "gptq":