Spaces:
Running
Running
jhj0517
commited on
Commit
·
5a66e88
1
Parent(s):
a85ea1b
refactor `insanely-fast-whisper` parameters
Browse files
app.py
CHANGED
|
@@ -125,7 +125,7 @@ class App:
|
|
| 125 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 126 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 127 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 128 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
| 129 |
info="Optional text to provide as a prefix for the first window.")
|
| 130 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 131 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
@@ -147,7 +147,7 @@ class App:
|
|
| 147 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 148 |
value=None,
|
| 149 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 150 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
| 151 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 152 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 153 |
value=None,
|
|
@@ -155,6 +155,10 @@ class App:
|
|
| 155 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 156 |
precision=0,
|
| 157 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
with gr.Accordion("VAD", open=False):
|
| 159 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 160 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
@@ -173,10 +177,6 @@ class App:
|
|
| 173 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 174 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 175 |
value=self.whisper_inf.diarizer.get_device())
|
| 176 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
| 177 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 178 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 179 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 180 |
with gr.Row():
|
| 181 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 182 |
with gr.Row():
|
|
@@ -279,7 +279,7 @@ class App:
|
|
| 279 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 280 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 281 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 282 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
| 283 |
info="Optional text to provide as a prefix for the first window.")
|
| 284 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 285 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
@@ -301,7 +301,7 @@ class App:
|
|
| 301 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 302 |
value=None,
|
| 303 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 304 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
| 305 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 306 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 307 |
value=None,
|
|
@@ -309,6 +309,10 @@ class App:
|
|
| 309 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 310 |
precision=0,
|
| 311 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
with gr.Accordion("VAD", open=False):
|
| 313 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 314 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
@@ -327,10 +331,6 @@ class App:
|
|
| 327 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 328 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 329 |
value=self.whisper_inf.diarizer.get_device())
|
| 330 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
| 331 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 332 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 333 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 334 |
with gr.Row():
|
| 335 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 336 |
with gr.Row():
|
|
@@ -425,7 +425,7 @@ class App:
|
|
| 425 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 426 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 427 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 428 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
| 429 |
info="Optional text to provide as a prefix for the first window.")
|
| 430 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 431 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
@@ -447,7 +447,7 @@ class App:
|
|
| 447 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 448 |
value=None,
|
| 449 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 450 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
| 451 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 452 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 453 |
value=None,
|
|
@@ -455,6 +455,11 @@ class App:
|
|
| 455 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 456 |
precision=0,
|
| 457 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
with gr.Accordion("VAD", open=False):
|
| 459 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 460 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
@@ -473,10 +478,6 @@ class App:
|
|
| 473 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 474 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 475 |
value=self.whisper_inf.diarizer.get_device())
|
| 476 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
| 477 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 478 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 479 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 480 |
with gr.Row():
|
| 481 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 482 |
with gr.Row():
|
|
|
|
| 125 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 126 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 127 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 128 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
| 129 |
info="Optional text to provide as a prefix for the first window.")
|
| 130 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 131 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
|
| 147 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 148 |
value=None,
|
| 149 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 150 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
| 151 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 152 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 153 |
value=None,
|
|
|
|
| 155 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 156 |
precision=0,
|
| 157 |
info="Number of segments to consider for the language detection.")
|
| 158 |
+
|
| 159 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 160 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 161 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 162 |
with gr.Accordion("VAD", open=False):
|
| 163 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 164 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
|
| 177 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 178 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 179 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
with gr.Row():
|
| 181 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 182 |
with gr.Row():
|
|
|
|
| 279 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 280 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 281 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 282 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
| 283 |
info="Optional text to provide as a prefix for the first window.")
|
| 284 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 285 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
|
| 301 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 302 |
value=None,
|
| 303 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 304 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
| 305 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 306 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 307 |
value=None,
|
|
|
|
| 309 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 310 |
precision=0,
|
| 311 |
info="Number of segments to consider for the language detection.")
|
| 312 |
+
|
| 313 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 314 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 315 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 316 |
with gr.Accordion("VAD", open=False):
|
| 317 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 318 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
|
| 331 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 332 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 333 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
with gr.Row():
|
| 335 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 336 |
with gr.Row():
|
|
|
|
| 425 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
| 426 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
| 427 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
| 428 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
| 429 |
info="Optional text to provide as a prefix for the first window.")
|
| 430 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
| 431 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
|
| 447 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
| 448 |
value=None,
|
| 449 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
| 450 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
| 451 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
| 452 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
| 453 |
value=None,
|
|
|
|
| 455 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
| 456 |
precision=0,
|
| 457 |
info="Number of segments to consider for the language detection.")
|
| 458 |
+
|
| 459 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
| 460 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
| 461 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
| 462 |
+
|
| 463 |
with gr.Accordion("VAD", open=False):
|
| 464 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
| 465 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
|
| 478 |
dd_diarization_device = gr.Dropdown(label="Device",
|
| 479 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
| 480 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
with gr.Row():
|
| 482 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 483 |
with gr.Row():
|