Fix speech_paraformer issue with unexpected changes (#12416)
* Fix speech_paraformer issue with unexpected changes * Add paraformer version specified
This commit is contained in:
		
							parent
							
								
									a9cb70a71c
								
							
						
					
					
						commit
						ff3f7cb25f
					
				
					 3 changed files with 20 additions and 16 deletions
				
			
		| 
						 | 
					@ -37,8 +37,8 @@ pip install timm torch==2.1.2 torchvision==0.16.2
 | 
				
			||||||
pip install BCEmbedding==0.1.5 transformers==4.40.0
 | 
					pip install BCEmbedding==0.1.5 transformers==4.40.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# [optional] for Speech_Paraformer-Large
 | 
					# [optional] for Speech_Paraformer-Large
 | 
				
			||||||
pip install -U funasr
 | 
					pip install funasr==1.1.14
 | 
				
			||||||
pip install modelscope torch==2.1.2 torchaudio==2.1.2
 | 
					pip install modelscope==1.20.1 torch==2.1.2 torchaudio==2.1.2
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### 2. Runtime Configurations
 | 
					### 2. Runtime Configurations
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -162,11 +162,13 @@ class _BaseAutoModelClass:
 | 
				
			||||||
                model = cls.HF_Model.from_pretrained(*args, **kwargs)
 | 
					                model = cls.HF_Model.from_pretrained(*args, **kwargs)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                model = cls.HF_Model(*args, **kwargs)
 | 
					                model = cls.HF_Model(*args, **kwargs)
 | 
				
			||||||
            model.config.update({"bigdl_lcmu_enabled": False})
 | 
					            if hasattr(model, "config"):
 | 
				
			||||||
 | 
					                model.config.update({"bigdl_lcmu_enabled": False})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        logger.info(f"Converting model, it may takes up to several minutes ...")
 | 
					        logger.info(f"Converting model, it may takes up to several minutes ...")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        model.config.update({"optimize_model": optimize_model})
 | 
					        if hasattr(model, "config"):
 | 
				
			||||||
 | 
					            model.config.update({"optimize_model": optimize_model})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if mock_device == "cpu":
 | 
					        if mock_device == "cpu":
 | 
				
			||||||
            with torch.no_grad():
 | 
					            with torch.no_grad():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -294,17 +294,17 @@ class FusedLlamaLowBitDecoderlayer(torch.nn.Module):
 | 
				
			||||||
            torch.Tensor: result
 | 
					            torch.Tensor: result
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        backend_cls = self.backend_cls_prefill
 | 
					        backend_cls = self.backend_cls_prefill
 | 
				
			||||||
        inputs = (x,
 | 
					        inputs = (x.to(torch.float16),
 | 
				
			||||||
                  masks,
 | 
					                  masks.to(torch.float16),
 | 
				
			||||||
                  self.layer_norm_0_weight,
 | 
					                  self.layer_norm_0_weight.to(torch.float16),
 | 
				
			||||||
                  self.layer_norm_0_bias,
 | 
					                  self.layer_norm_0_bias.to(torch.float16),
 | 
				
			||||||
                  self.layer_norm_1_weight,
 | 
					                  self.layer_norm_1_weight.to(torch.float16),
 | 
				
			||||||
                  self.layer_norm_1_bias,
 | 
					                  self.layer_norm_1_bias.to(torch.float16),
 | 
				
			||||||
                  self.fsmn_weight,
 | 
					                  self.fsmn_weight.to(torch.float16),
 | 
				
			||||||
                  self.qkv_bias,
 | 
					                  self.qkv_bias.to(torch.float16),
 | 
				
			||||||
                  self.out_bias,
 | 
					                  self.out_bias.to(torch.float16),
 | 
				
			||||||
                  self.w1_bias,
 | 
					                  self.w1_bias.to(torch.float16),
 | 
				
			||||||
                  self.w2_bias,
 | 
					                  self.w2_bias.to(torch.float16),
 | 
				
			||||||
                  )
 | 
					                  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        outputs = run_model(
 | 
					        outputs = run_model(
 | 
				
			||||||
| 
						 | 
					@ -431,6 +431,8 @@ class PrefillRunner:
 | 
				
			||||||
        args = (xs_pad, masks)
 | 
					        args = (xs_pad, masks)
 | 
				
			||||||
        self.prefill_input_queue.put(args)
 | 
					        self.prefill_input_queue.put(args)
 | 
				
			||||||
        xs_pad, masks = self.prefill_result_queue.get()
 | 
					        xs_pad, masks = self.prefill_result_queue.get()
 | 
				
			||||||
 | 
					        xs_pad = xs_pad.to(torch.float32)
 | 
				
			||||||
 | 
					        masks = masks.to(torch.float32)
 | 
				
			||||||
        return xs_pad, masks
 | 
					        return xs_pad, masks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def shutdown(self):
 | 
					    def shutdown(self):
 | 
				
			||||||
| 
						 | 
					@ -639,7 +641,7 @@ class FusedLlamaLowBitMultiDecoderlayer(torch.nn.Module):
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        super().__init__()
 | 
					        super().__init__()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.do_print = True
 | 
					        self.do_print = do_print
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        op_parameters = []
 | 
					        op_parameters = []
 | 
				
			||||||
        for w in parameters:
 | 
					        for w in parameters:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue