cnph001 commited on
Commit
4337b98
·
verified ·
1 Parent(s): 3e2b3a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -91
app.py CHANGED
@@ -1,129 +1,145 @@
1
- import spaces
2
- import gradio as gr
3
- import edge_tts
4
  import asyncio
5
- import tempfile
6
  import os
7
- import re # Import the regular expression module
 
 
 
 
 
 
8
 
 
 
 
 
 
9
 
10
- # Get all available voices
11
- async def get_voices():
12
- voices = await edge_tts.list_voices()
13
- return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
14
 
15
- # Text-to-speech function for a single paragraph
16
- async def paragraph_to_speech(text, voice, rate, pitch):
17
- voice1 ="en-US-AndrewNeural - en-US (Male)" #good for reading
18
- voice1F ="en-US-EmmaNeural - en-US (Female)"
 
19
  voice2 = "en-US-BrianNeural - en-US (Male)"
20
- voice2F = "en-US-JennyNeural - en-US (Female)"
21
- voice3 = "en-AU-WilliamNeural - en-AU (Male)"
22
  voice3F = "en-HK-YanNeural - en-HK (Female)"
23
- voice4 = "en-GB-MaisieNeural - en-GB (Female)" #Child
 
24
  if not text.strip():
25
  return None
 
 
 
26
  if text.startswith("1F"):
27
- text2 = text[2:] # Remove the first two characters ("FF")
28
- voice_short_name =voice1F.split(" - ")[0]
29
  elif text.startswith("2F"):
30
- text2 = text[2:] # Remove the first two characters ("FF")
31
- voice_short_name =voice2F.split(" - ")[0]
32
  elif text.startswith("3F"):
33
- text2 = text[2:] # Remove the first two characters ("FF")
34
- voice_short_name =voice3F.split(" - ")[0]
35
  elif text.startswith("1M"):
36
- text2 = text[2:] # Remove the first two characters ("FF")
37
- voice_short_name =voice2.split(" - ")[0]
38
  elif text.startswith("2M"):
39
- text2 = text[2:] # Remove the first two characters ("FF")
40
- voice_short_name =voice3.split(" - ")[0]
41
  elif text.startswith("1C"):
42
- text2 = text[2:] # Remove the first two characters ("FF")
43
- voice_short_name =voice4.split(" - ")[0]
44
  else:
45
- # Use selected voice, or fallback to default
46
- voice_short_name = (voice or default_voice).split(" - ")[0]
47
- text2=text
48
  rate_str = f"{rate:+d}%"
49
  pitch_str = f"{pitch:+d}Hz"
50
- communicate = edge_tts.Communicate(text2, voice_short_name, rate=rate_str, pitch=pitch_str)
51
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
52
- tmp_path = tmp_file.name
53
- await communicate.save(tmp_path)
54
- return tmp_path
 
 
 
 
55
 
56
- # Main text-to-speech function that processes paragraphs
57
  async def text_to_speech(text, voice, rate, pitch):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if not text.strip():
59
  return None, gr.Warning("Please enter text to convert.")
60
  if not voice:
61
  return None, gr.Warning("Please select a voice.")
62
 
63
- # Split by two or more newline characters, optionally preceded by carriage returns
64
- paragraphs = [p for p in re.split(r'\r?\n\r?\n+', text) if p.strip()]
65
-
66
  audio_files = []
67
- for paragraph in paragraphs:
68
- audio_path = await paragraph_to_speech(paragraph, voice, rate, pitch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  if audio_path:
70
  audio_files.append(audio_path)
71
 
72
  if not audio_files:
73
- return None, None # No audio generated
74
 
75
- # Combine audio files if there are multiple paragraphs
76
  if len(audio_files) == 1:
77
  return audio_files[0], None
78
  else:
79
- # Simple concatenation for now - consider using a proper audio editing library for smoother transitions
80
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
81
  with open(combined_audio_path, 'wb') as outfile:
82
  for filename in audio_files:
83
- with open(filename, 'rb') as infile:
84
- outfile.write(infile.read())
85
- os.remove(filename) # Clean up individual files
 
 
 
 
86
  return combined_audio_path, None
87
-
88
- # Gradio interface function
89
- @spaces.GPU
90
- def tts_interface(text, voice, rate, pitch):
91
- audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
92
- return audio, warning
93
-
94
- # Create Gradio application
95
- import gradio as gr
96
-
97
- async def create_demo():
98
- voices = await get_voices()
99
- default_voice = "en-US-AndrewNeural - en-US (Male)" # 👈 Pick one of the available voices
100
- description = """
101
- Default = male, other voices 1F:US_Emma, 2F:US_Jenny, 3F:HK_Jan, 1M:US_Guy, 2M:AU_William, 1C: Childvoice
102
- Enter your text, select a voice, and adjust the speech rate and pitch.
103
- The application will process your text paragraph by paragraph (separated by two blank lines).
104
- """
105
-
106
- demo = gr.Interface(
107
- fn=tts_interface,
108
- inputs=[
109
- gr.Textbox(label="Input Text", lines=5, placeholder="Separate paragraphs with two blank lines."),
110
- gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=default_voice),
111
- gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
112
- gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
113
- ],
114
- outputs=[
115
- gr.Audio(label="Generated Audio", type="filepath"),
116
- gr.Markdown(label="Warning", visible=False)
117
- ],
118
- title="Voicecloning.be Text-to-Speech (Paragraph by Paragraph)",
119
- description=description,
120
- article="Process text paragraph by paragraph for smoother output.",
121
- analytics_enabled=False,
122
- allow_flagging=False
123
- )
124
- return demo
125
-
126
- # Run the application
127
- if __name__ == "__main__":
128
- demo = asyncio.run(create_demo())
129
- demo.launch()
 
 
 
 
1
  import asyncio
 
2
  import os
3
+ import re
4
+ import tempfile
5
+ import edge_tts
6
+ import gradio as gr
7
+
8
+ # Default voice
9
+ default_voice = "en-US-AndrewNeural - en-US (Male)"
10
 
11
+ # Text-to-speech function for a single segment
12
+ async def process_speech_segment(text, voice, rate, pitch):
13
+ """
14
+ Processes a single segment of text (either a quote or regular text)
15
+ and generates speech using edge-tts.
16
 
17
+ Args:
18
+ text (str): The text to be converted to speech.
19
+ voice (str): The voice to use (can be overridden by prefixes).
20
+ rate (int): The speech rate.
21
+ pitch (int): The speech pitch.
22
 
23
+ Returns:
24
+ str: The path to the generated audio file, or None on error.
25
+ """
26
+ voice1 = "en-US-AndrewNeural - en-US (Male)" # good for reading
27
+ voice1F = "en-US-EmmaNeural - en-US (Female)"
28
  voice2 = "en-US-BrianNeural - en-US (Male)"
29
+ voice2F = "en-US-JennyNeural - en-US (Female)"
30
+ voice3 = "en-AU-WilliamNeural - en-AU (Male)"
31
  voice3F = "en-HK-YanNeural - en-HK (Female)"
32
+ voice4 = "en-GB-MaisieNeural - en-GB (Female)" # Child
33
+
34
  if not text.strip():
35
  return None
36
+
37
+ voice_short_name = voice.split(" - ")[0] #default
38
+
39
  if text.startswith("1F"):
40
+ text2 = text[2:].strip()
41
+ voice_short_name = voice1F.split(" - ")[0]
42
  elif text.startswith("2F"):
43
+ text2 = text[2:].strip()
44
+ voice_short_name = voice2F.split(" - ")[0]
45
  elif text.startswith("3F"):
46
+ text2 = text[2:].strip()
47
+ voice_short_name = voice3F.split(" - ")[0]
48
  elif text.startswith("1M"):
49
+ text2 = text[2:].strip()
50
+ voice_short_name = voice2.split(" - ")[0]
51
  elif text.startswith("2M"):
52
+ text2 = text[2:].strip()
53
+ voice_short_name = voice3.split(" - ")[0]
54
  elif text.startswith("1C"):
55
+ text2 = text[2:].strip()
56
+ voice_short_name = voice4.split(" - ")[0]
57
  else:
58
+ text2 = text
59
+
 
60
  rate_str = f"{rate:+d}%"
61
  pitch_str = f"{pitch:+d}Hz"
62
+ try:
63
+ communicate = edge_tts.Communicate(text2, voice=voice_short_name, rate=rate_str, pitch=pitch_str) #removed async
64
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
65
+ tmp_path = tmp_file.name
66
+ await communicate.save(tmp_path) #added await
67
+ return tmp_path
68
+ except Exception as e:
69
+ print(f"Error processing segment: {e}") # Log the error
70
+ return None
71
 
72
+ # Main text-to-speech function
73
  async def text_to_speech(text, voice, rate, pitch):
74
+ """
75
+ Processes the input text, identifying quoted sections for different voices,
76
+ and generates combined audio.
77
+
78
+ Args:
79
+ text (str): The input text.
80
+ voice (str): The default voice.
81
+ rate (int): The speech rate.
82
+ pitch (int): The speech pitch.
83
+
84
+ Returns:
85
+ tuple: (audio_path, error_message) where audio_path is the path to the
86
+ combined audio file, and error_message is any error encountered.
87
+ """
88
  if not text.strip():
89
  return None, gr.Warning("Please enter text to convert.")
90
  if not voice:
91
  return None, gr.Warning("Please select a voice.")
92
 
 
 
 
93
  audio_files = []
94
+ segments = []
95
+ i = 0
96
+ while i < len(text):
97
+ if text[i] == '"':
98
+ # Find the closing quote
99
+ j = i + 1
100
+ while j < len(text) and text[j] != '"':
101
+ j += 1
102
+ if j < len(text):
103
+ segments.append(("quote", text[i + 1:j]))
104
+ i = j + 1
105
+ else:
106
+ segments.append(("text", text[i:])) # Handle unclosed quote
107
+ i = j
108
+ else:
109
+ # Find the end of the non-quote text
110
+ j = i + 1
111
+ while j < len(text) and text[j] != '"':
112
+ j += 1
113
+ segments.append(("text", text[i:j]))
114
+ i = j
115
+
116
+ for segment_type, segment_text in segments:
117
+ if segment_type == "quote":
118
+ # Determine the voice based on the prefix within the quote.
119
+ voice_prefix = ""
120
+ if segment_text.startswith("1F") or segment_text.startswith("2F") or segment_text.startswith("3F") or segment_text.startswith("1M") or segment_text.startswith("2M") or segment_text.startswith("1C"):
121
+ voice_prefix = segment_text[:2]
122
+ audio_path = await process_speech_segment(segment_text, voice, rate, pitch)
123
+ else:
124
+ audio_path = await process_speech_segment(segment_text, voice, rate, pitch)
125
  if audio_path:
126
  audio_files.append(audio_path)
127
 
128
  if not audio_files:
129
+ return None, None
130
 
 
131
  if len(audio_files) == 1:
132
  return audio_files[0], None
133
  else:
134
+ # Combine audio files
135
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
136
  with open(combined_audio_path, 'wb') as outfile:
137
  for filename in audio_files:
138
+ try:
139
+ with open(filename, 'rb') as infile:
140
+ outfile.write(infile.read())
141
+ os.remove(filename) # Clean up individual files
142
+ except Exception as e:
143
+ print(f"Error combining audio files: {e}")
144
+ return None, gr.Error(f"Error combining audio files: {e}")
145
  return combined_audio_path, None