Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update whisper_cs_dev.py
Browse filesDeleted the fake model hack
- whisper_cs_dev.py +5 -20
    	
        whisper_cs_dev.py
    CHANGED
    
    | @@ -11,7 +11,7 @@ from pathlib import Path | |
| 11 | 
             
            import glob
         | 
| 12 | 
             
            import ctypes
         | 
| 13 |  | 
| 14 | 
            -
            from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH,  | 
| 15 |  | 
| 16 | 
             
            def load_cudnn():
         | 
| 17 |  | 
| @@ -93,15 +93,9 @@ def load_model(use_v2_fast, device, compute_type): | |
| 93 | 
             
                        device = device,
         | 
| 94 | 
             
                    )
         | 
| 95 |  | 
| 96 | 
            -
                # HACK we need to do this for strange reasons.
         | 
| 97 | 
            -
                # If we don't do this, we get:
         | 
| 98 | 
            -
                #Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
         | 
| 99 | 
            -
                #fake_model = whisper_ts.load_model(MODEL_PATH_V2, device=device)
         | 
| 100 | 
            -
                fake_model = None
         | 
| 101 | 
            -
             | 
| 102 | 
             
                if DEBUG_MODE: print(f"Exiting load_model function...")
         | 
| 103 |  | 
| 104 | 
            -
                return model | 
| 105 |  | 
| 106 |  | 
| 107 | 
             
            def split_input_stereo_channels(audio_path):
         | 
| @@ -202,19 +196,10 @@ def transcribe_audio_no_fast_model(model, audio_path): | |
| 202 | 
             
                if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
         | 
| 203 |  | 
| 204 |  | 
| 205 | 
            -
            def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast | 
| 206 |  | 
| 207 | 
             
                if DEBUG_MODE: print(f"Entering transcribe_channels function...")
         | 
| 208 |  | 
| 209 | 
            -
                # HACK we need to do this for strange reasons.
         | 
| 210 | 
            -
                # If we don't do this, we get:
         | 
| 211 | 
            -
                #Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
         | 
| 212 | 
            -
                #fake_result = whisper_ts.transcribe(
         | 
| 213 | 
            -
                #    fake_model,
         | 
| 214 | 
            -
                #    FAKE_AUDIO_PATH,
         | 
| 215 | 
            -
                #    beam_size=1,
         | 
| 216 | 
            -
                #)
         | 
| 217 | 
            -
             | 
| 218 | 
             
                if DEBUG_MODE: print(f"Preparing to transcribe...")
         | 
| 219 |  | 
| 220 | 
             
                if use_v2_fast:
         | 
| @@ -354,10 +339,10 @@ def generate(audio_path, use_v2_fast): | |
| 354 |  | 
| 355 | 
             
                load_cudnn()
         | 
| 356 | 
             
                device, compute_type = get_settings()
         | 
| 357 | 
            -
                model | 
| 358 | 
             
                split_input_stereo_channels(audio_path)
         | 
| 359 | 
             
                left_waveform, right_waveform = process_waveforms()
         | 
| 360 | 
            -
                left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast | 
| 361 | 
             
                output = post_process_transcripts(left_result, right_result, use_v2_fast)
         | 
| 362 | 
             
                cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
         | 
| 363 |  | 
|  | |
| 11 | 
             
            import glob
         | 
| 12 | 
             
            import ctypes
         | 
| 13 |  | 
| 14 | 
            +
            from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH, RESAMPLING_FREQ
         | 
| 15 |  | 
| 16 | 
             
            def load_cudnn():
         | 
| 17 |  | 
|  | |
| 93 | 
             
                        device = device,
         | 
| 94 | 
             
                    )
         | 
| 95 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 96 | 
             
                if DEBUG_MODE: print(f"Exiting load_model function...")
         | 
| 97 |  | 
| 98 | 
            +
                return model
         | 
| 99 |  | 
| 100 |  | 
| 101 | 
             
            def split_input_stereo_channels(audio_path):
         | 
|  | |
| 196 | 
             
                if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
         | 
| 197 |  | 
| 198 |  | 
| 199 | 
            +
            def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast):
         | 
| 200 |  | 
| 201 | 
             
                if DEBUG_MODE: print(f"Entering transcribe_channels function...")
         | 
| 202 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 203 | 
             
                if DEBUG_MODE: print(f"Preparing to transcribe...")
         | 
| 204 |  | 
| 205 | 
             
                if use_v2_fast:
         | 
|  | |
| 339 |  | 
| 340 | 
             
                load_cudnn()
         | 
| 341 | 
             
                device, compute_type = get_settings()
         | 
| 342 | 
            +
                model = load_model(use_v2_fast, device, compute_type)
         | 
| 343 | 
             
                split_input_stereo_channels(audio_path)
         | 
| 344 | 
             
                left_waveform, right_waveform = process_waveforms()
         | 
| 345 | 
            +
                left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast)
         | 
| 346 | 
             
                output = post_process_transcripts(left_result, right_result, use_v2_fast)
         | 
| 347 | 
             
                cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
         | 
| 348 |  | 
 
			
