M4xjunior commited on
Commit
5f1f288
·
verified ·
1 Parent(s): 375ecba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -66,7 +66,7 @@ def load_f5tts():
66
  ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
67
 
68
  F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
69
- return load_model(DiT, F5TTS_model_cfg, ckpt_path)
70
 
71
 
72
 
@@ -120,7 +120,7 @@ def generate_response(messages, model, tokenizer):
120
 
121
  @gpu_decorator
122
  def infer(
123
- ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=1, show_info=gr.Info
124
  ):
125
  ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
126
 
@@ -148,6 +148,7 @@ def infer(
148
  ema_model,
149
  vocoder,
150
  cross_fade_duration=cross_fade_duration,
 
151
  speed=speed,
152
  show_info=show_info,
153
  progress=gr.Progress(),
@@ -170,13 +171,7 @@ def infer(
170
 
171
 
172
  with gr.Blocks() as app_credits:
173
- gr.Markdown("""
174
- # Credits
175
-
176
- * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
177
- * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
178
- * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
179
- """)
180
  with gr.Blocks() as app_tts:
181
  gr.Markdown("# Batched TTS")
182
  ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
@@ -209,6 +204,14 @@ with gr.Blocks() as app_tts:
209
  step=0.01,
210
  info="Set the duration of the cross-fade between audio clips.",
211
  )
 
 
 
 
 
 
 
 
212
 
213
  audio_output = gr.Audio(label="Synthesized Audio")
214
  spectrogram_output = gr.Image(label="Spectrogram")
@@ -230,6 +233,7 @@ with gr.Blocks() as app_tts:
230
  remove_silence,
231
  cross_fade_duration_slider,
232
  speed_slider,
 
233
  )
234
  return audio_out, spectrogram_path, gr.update(value=ref_text_out)
235
 
@@ -242,6 +246,7 @@ with gr.Blocks() as app_tts:
242
  remove_silence,
243
  cross_fade_duration_slider,
244
  speed_slider,
 
245
  ],
246
  outputs=[audio_output, spectrogram_output, ref_text_input],
247
  )
 
66
  ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
67
 
68
  F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
69
+ return load_model(DiT, F5TTS_model_cfg, ckpt_path, use_ema=True)
70
 
71
 
72
 
 
120
 
121
  @gpu_decorator
122
  def infer(
123
+ ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=1, nfe=32, show_info=gr.Info
124
  ):
125
  ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
126
 
 
148
  ema_model,
149
  vocoder,
150
  cross_fade_duration=cross_fade_duration,
151
+ nfe_step=nfe,
152
  speed=speed,
153
  show_info=show_info,
154
  progress=gr.Progress(),
 
171
 
172
 
173
  with gr.Blocks() as app_credits:
174
+ gr.Markdown("F5-TTS")
 
 
 
 
 
 
175
  with gr.Blocks() as app_tts:
176
  gr.Markdown("# Batched TTS")
177
  ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
 
204
  step=0.01,
205
  info="Set the duration of the cross-fade between audio clips.",
206
  )
207
+ nfe_slider = gr.Slider(
208
+ label="NFE",
209
+ minimum=16,
210
+ maximum=64,
211
+ value=32,
212
+ step=1,
213
+ info="Ajuste NFE Step.",
214
+ )
215
 
216
  audio_output = gr.Audio(label="Synthesized Audio")
217
  spectrogram_output = gr.Image(label="Spectrogram")
 
233
  remove_silence,
234
  cross_fade_duration_slider,
235
  speed_slider,
236
+ nfe_slider,
237
  )
238
  return audio_out, spectrogram_path, gr.update(value=ref_text_out)
239
 
 
246
  remove_silence,
247
  cross_fade_duration_slider,
248
  speed_slider,
249
+ nfe_slider,
250
  ],
251
  outputs=[audio_output, spectrogram_output, ref_text_input],
252
  )