Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -51,7 +51,7 @@ def find_mp4_files(directory): | |
| 51 |  | 
| 52 | 
             
                return mp4_files
         | 
| 53 |  | 
| 54 | 
            -
             | 
| 55 | 
             
            def fn_clearvoice_tse(input_video):
         | 
| 56 | 
             
                myClearVoice = ClearVoice(task='target_speaker_extraction', model_names=['AV_MossFormer2_TSE_16K'])
         | 
| 57 | 
             
                #output_wav_dict = 
         | 
| @@ -119,11 +119,10 @@ tse_demo = gr.Interface( | |
| 119 | 
             
                    gr.Gallery(label="Output Video List")
         | 
| 120 | 
             
                ],
         | 
| 121 | 
             
                title = "ClearVoice: Audio-visual speaker extraction",
         | 
| 122 | 
            -
                description = ("Gradio demo for audio-visual speaker extraction with ClearVoice. | 
| 123 | 
            -
                               "We provide the generalized models trained on mid-scale of data for handling independent speakers and various of background environments. "
         | 
| 124 | 
             
                                "To test it, simply upload your video, or click one of the examples to load them. Read more at the links below."),
         | 
| 125 | 
            -
                article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
         | 
| 126 | 
            -
             | 
| 127 | 
             
                examples = [
         | 
| 128 | 
             
                    ['examples/001.mp4'],
         | 
| 129 | 
             
                    ['examples/002.mp4'],
         | 
| @@ -133,6 +132,6 @@ tse_demo = gr.Interface( | |
| 133 |  | 
| 134 | 
             
            with demo:
         | 
| 135 | 
             
                #gr.TabbedInterface([se_demo], ["Speech Enhancement"])
         | 
| 136 | 
            -
                gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Speech Enhancement", "Speech Separation", " | 
| 137 |  | 
| 138 | 
             
            demo.launch()
         | 
|  | |
| 51 |  | 
| 52 | 
             
                return mp4_files
         | 
| 53 |  | 
| 54 | 
            +
             | 
| 55 | 
             
            def fn_clearvoice_tse(input_video):
         | 
| 56 | 
             
                myClearVoice = ClearVoice(task='target_speaker_extraction', model_names=['AV_MossFormer2_TSE_16K'])
         | 
| 57 | 
             
                #output_wav_dict = 
         | 
|  | |
| 119 | 
             
                    gr.Gallery(label="Output Video List")
         | 
| 120 | 
             
                ],
         | 
| 121 | 
             
                title = "ClearVoice: Audio-visual speaker extraction",
         | 
| 122 | 
            +
                description = ("Gradio demo for audio-visual speaker extraction with ClearVoice."
         | 
|  | |
| 123 | 
             
                                "To test it, simply upload your video, or click one of the examples to load them. Read more at the links below."),
         | 
| 124 | 
            +
                # article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
         | 
| 125 | 
            +
                #           "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
         | 
| 126 | 
             
                examples = [
         | 
| 127 | 
             
                    ['examples/001.mp4'],
         | 
| 128 | 
             
                    ['examples/002.mp4'],
         | 
|  | |
| 132 |  | 
| 133 | 
             
            with demo:
         | 
| 134 | 
             
                #gr.TabbedInterface([se_demo], ["Speech Enhancement"])
         | 
| 135 | 
            +
                gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Speech Enhancement", "Speech Separation", "Audio-visual Speaker Extraction"])
         | 
| 136 |  | 
| 137 | 
             
            demo.launch()
         |