Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	
		jhj0517
		
	commited on
		
		
					Commit 
							
							·
						
						eec0c16
	
1
								Parent(s):
							
							ddbe0b6
								
Fix VAD syntax & add vad handling case
Browse files
    	
        modules/whisper/base_transcription_pipeline.py
    CHANGED
    
    | @@ -135,12 +135,17 @@ class BaseTranscriptionPipeline(ABC): | |
| 135 | 
             
                            speech_pad_ms=vad_params.speech_pad_ms
         | 
| 136 | 
             
                        )
         | 
| 137 |  | 
| 138 | 
            -
                         | 
| 139 | 
             
                            audio=audio,
         | 
| 140 | 
             
                            vad_parameters=vad_options,
         | 
| 141 | 
             
                            progress=progress
         | 
| 142 | 
             
                        )
         | 
| 143 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 144 | 
             
                    result, elapsed_time = self.transcribe(
         | 
| 145 | 
             
                        audio,
         | 
| 146 | 
             
                        progress,
         | 
| @@ -150,7 +155,7 @@ class BaseTranscriptionPipeline(ABC): | |
| 150 | 
             
                    if vad_params.vad_filter:
         | 
| 151 | 
             
                        result = self.vad.restore_speech_timestamps(
         | 
| 152 | 
             
                            segments=result,
         | 
| 153 | 
            -
                            speech_chunks= | 
| 154 | 
             
                        )
         | 
| 155 |  | 
| 156 | 
             
                    if diarization_params.is_diarize:
         | 
|  | |
| 135 | 
             
                            speech_pad_ms=vad_params.speech_pad_ms
         | 
| 136 | 
             
                        )
         | 
| 137 |  | 
| 138 | 
            +
                        vad_processed, speech_chunks = self.vad.run(
         | 
| 139 | 
             
                            audio=audio,
         | 
| 140 | 
             
                            vad_parameters=vad_options,
         | 
| 141 | 
             
                            progress=progress
         | 
| 142 | 
             
                        )
         | 
| 143 |  | 
| 144 | 
            +
                        if vad_processed.size > 0:
         | 
| 145 | 
            +
                            audio = vad_processed
         | 
| 146 | 
            +
                        else:
         | 
| 147 | 
            +
                            vad_params.vad_filter = False
         | 
| 148 | 
            +
             | 
| 149 | 
             
                    result, elapsed_time = self.transcribe(
         | 
| 150 | 
             
                        audio,
         | 
| 151 | 
             
                        progress,
         | 
|  | |
| 155 | 
             
                    if vad_params.vad_filter:
         | 
| 156 | 
             
                        result = self.vad.restore_speech_timestamps(
         | 
| 157 | 
             
                            segments=result,
         | 
| 158 | 
            +
                            speech_chunks=speech_chunks,
         | 
| 159 | 
             
                        )
         | 
| 160 |  | 
| 161 | 
             
                    if diarization_params.is_diarize:
         |