Fix speech_paraformer issue with unexpected changes (#12416)
* Fix speech_paraformer issue with unexpected changes * Add paraformer version specified
This commit is contained in:
parent
a9cb70a71c
commit
ff3f7cb25f
3 changed files with 20 additions and 16 deletions
|
|
@ -37,8 +37,8 @@ pip install timm torch==2.1.2 torchvision==0.16.2
|
||||||
pip install BCEmbedding==0.1.5 transformers==4.40.0
|
pip install BCEmbedding==0.1.5 transformers==4.40.0
|
||||||
|
|
||||||
# [optional] for Speech_Paraformer-Large
|
# [optional] for Speech_Paraformer-Large
|
||||||
pip install -U funasr
|
pip install funasr==1.1.14
|
||||||
pip install modelscope torch==2.1.2 torchaudio==2.1.2
|
pip install modelscope==1.20.1 torch==2.1.2 torchaudio==2.1.2
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Runtime Configurations
|
### 2. Runtime Configurations
|
||||||
|
|
|
||||||
|
|
@ -162,11 +162,13 @@ class _BaseAutoModelClass:
|
||||||
model = cls.HF_Model.from_pretrained(*args, **kwargs)
|
model = cls.HF_Model.from_pretrained(*args, **kwargs)
|
||||||
else:
|
else:
|
||||||
model = cls.HF_Model(*args, **kwargs)
|
model = cls.HF_Model(*args, **kwargs)
|
||||||
model.config.update({"bigdl_lcmu_enabled": False})
|
if hasattr(model, "config"):
|
||||||
|
model.config.update({"bigdl_lcmu_enabled": False})
|
||||||
|
|
||||||
logger.info(f"Converting model, it may takes up to several minutes ...")
|
logger.info(f"Converting model, it may takes up to several minutes ...")
|
||||||
|
|
||||||
model.config.update({"optimize_model": optimize_model})
|
if hasattr(model, "config"):
|
||||||
|
model.config.update({"optimize_model": optimize_model})
|
||||||
|
|
||||||
if mock_device == "cpu":
|
if mock_device == "cpu":
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
|
|
||||||
|
|
@ -294,17 +294,17 @@ class FusedLlamaLowBitDecoderlayer(torch.nn.Module):
|
||||||
torch.Tensor: result
|
torch.Tensor: result
|
||||||
"""
|
"""
|
||||||
backend_cls = self.backend_cls_prefill
|
backend_cls = self.backend_cls_prefill
|
||||||
inputs = (x,
|
inputs = (x.to(torch.float16),
|
||||||
masks,
|
masks.to(torch.float16),
|
||||||
self.layer_norm_0_weight,
|
self.layer_norm_0_weight.to(torch.float16),
|
||||||
self.layer_norm_0_bias,
|
self.layer_norm_0_bias.to(torch.float16),
|
||||||
self.layer_norm_1_weight,
|
self.layer_norm_1_weight.to(torch.float16),
|
||||||
self.layer_norm_1_bias,
|
self.layer_norm_1_bias.to(torch.float16),
|
||||||
self.fsmn_weight,
|
self.fsmn_weight.to(torch.float16),
|
||||||
self.qkv_bias,
|
self.qkv_bias.to(torch.float16),
|
||||||
self.out_bias,
|
self.out_bias.to(torch.float16),
|
||||||
self.w1_bias,
|
self.w1_bias.to(torch.float16),
|
||||||
self.w2_bias,
|
self.w2_bias.to(torch.float16),
|
||||||
)
|
)
|
||||||
|
|
||||||
outputs = run_model(
|
outputs = run_model(
|
||||||
|
|
@ -431,6 +431,8 @@ class PrefillRunner:
|
||||||
args = (xs_pad, masks)
|
args = (xs_pad, masks)
|
||||||
self.prefill_input_queue.put(args)
|
self.prefill_input_queue.put(args)
|
||||||
xs_pad, masks = self.prefill_result_queue.get()
|
xs_pad, masks = self.prefill_result_queue.get()
|
||||||
|
xs_pad = xs_pad.to(torch.float32)
|
||||||
|
masks = masks.to(torch.float32)
|
||||||
return xs_pad, masks
|
return xs_pad, masks
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
|
|
@ -639,7 +641,7 @@ class FusedLlamaLowBitMultiDecoderlayer(torch.nn.Module):
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.do_print = True
|
self.do_print = do_print
|
||||||
|
|
||||||
op_parameters = []
|
op_parameters = []
|
||||||
for w in parameters:
|
for w in parameters:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue