Fix speech_paraformer issue with unexpected changes (#12416)

* Fix speech_paraformer issue with unexpected changes

* Add paraformer version specified
This commit is contained in:
SONG Ge 2024-11-18 23:01:20 -08:00 committed by GitHub
parent a9cb70a71c
commit ff3f7cb25f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 20 additions and 16 deletions

View file

@ -37,8 +37,8 @@ pip install timm torch==2.1.2 torchvision==0.16.2
pip install BCEmbedding==0.1.5 transformers==4.40.0 pip install BCEmbedding==0.1.5 transformers==4.40.0
# [optional] for Speech_Paraformer-Large # [optional] for Speech_Paraformer-Large
pip install -U funasr pip install funasr==1.1.14
pip install modelscope torch==2.1.2 torchaudio==2.1.2 pip install modelscope==1.20.1 torch==2.1.2 torchaudio==2.1.2
``` ```
### 2. Runtime Configurations ### 2. Runtime Configurations

View file

@ -162,11 +162,13 @@ class _BaseAutoModelClass:
model = cls.HF_Model.from_pretrained(*args, **kwargs) model = cls.HF_Model.from_pretrained(*args, **kwargs)
else: else:
model = cls.HF_Model(*args, **kwargs) model = cls.HF_Model(*args, **kwargs)
model.config.update({"bigdl_lcmu_enabled": False}) if hasattr(model, "config"):
model.config.update({"bigdl_lcmu_enabled": False})
logger.info(f"Converting model, it may takes up to several minutes ...") logger.info(f"Converting model, it may takes up to several minutes ...")
model.config.update({"optimize_model": optimize_model}) if hasattr(model, "config"):
model.config.update({"optimize_model": optimize_model})
if mock_device == "cpu": if mock_device == "cpu":
with torch.no_grad(): with torch.no_grad():

View file

@ -294,17 +294,17 @@ class FusedLlamaLowBitDecoderlayer(torch.nn.Module):
torch.Tensor: result torch.Tensor: result
""" """
backend_cls = self.backend_cls_prefill backend_cls = self.backend_cls_prefill
inputs = (x, inputs = (x.to(torch.float16),
masks, masks.to(torch.float16),
self.layer_norm_0_weight, self.layer_norm_0_weight.to(torch.float16),
self.layer_norm_0_bias, self.layer_norm_0_bias.to(torch.float16),
self.layer_norm_1_weight, self.layer_norm_1_weight.to(torch.float16),
self.layer_norm_1_bias, self.layer_norm_1_bias.to(torch.float16),
self.fsmn_weight, self.fsmn_weight.to(torch.float16),
self.qkv_bias, self.qkv_bias.to(torch.float16),
self.out_bias, self.out_bias.to(torch.float16),
self.w1_bias, self.w1_bias.to(torch.float16),
self.w2_bias, self.w2_bias.to(torch.float16),
) )
outputs = run_model( outputs = run_model(
@ -431,6 +431,8 @@ class PrefillRunner:
args = (xs_pad, masks) args = (xs_pad, masks)
self.prefill_input_queue.put(args) self.prefill_input_queue.put(args)
xs_pad, masks = self.prefill_result_queue.get() xs_pad, masks = self.prefill_result_queue.get()
xs_pad = xs_pad.to(torch.float32)
masks = masks.to(torch.float32)
return xs_pad, masks return xs_pad, masks
def shutdown(self): def shutdown(self):
@ -639,7 +641,7 @@ class FusedLlamaLowBitMultiDecoderlayer(torch.nn.Module):
): ):
super().__init__() super().__init__()
self.do_print = True self.do_print = do_print
op_parameters = [] op_parameters = []
for w in parameters: for w in parameters: