cnph001 commited on
Commit
b184cb6
·
verified ·
1 Parent(s): ba3a67a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -42
app.py CHANGED
@@ -138,48 +138,36 @@ async def process_transcript_line(line, default_voice, rate, pitch):
138
  return start_time_ms, audio_segments
139
  return None, None
140
 
141
- async def transcript_to_speech(transcript_text, voice, rate, pitch):
142
- if not transcript_text.strip():
143
- return None, gr.Warning("Please enter transcript text.")
144
- if not voice:
145
- return None, gr.Warning("Please select a voice.")
146
-
147
- lines = transcript_text.strip().split('\n')
148
- timed_audio_segments = []
149
- max_end_time_ms = 0
150
-
151
- for line in lines:
152
- start_time, audio_paths = await process_transcript_line(line, voice, rate, pitch)
153
- if start_time is not None and audio_paths:
154
- combined_line_audio = AudioSegment.empty()
155
- for path in audio_paths:
156
- try:
157
- audio = AudioSegment.from_mp3(path)
158
- combined_line_audio += audio
159
- os.remove(path)
160
- except FileNotFoundError:
161
- print(f"Warning: Audio file not found: {path}")
162
-
163
- if combined_line_audio:
164
- timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
165
- max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
166
- elif audio_paths:
167
- for path in audio_paths:
168
- try:
169
- os.remove(path)
170
- except FileNotFoundError:
171
- pass # Clean up even if no timestamp
172
-
173
- if not timed_audio_segments:
174
- return None, "No processable audio segments found."
175
-
176
- final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
177
- for segment in timed_audio_segments:
178
- final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
179
-
180
- combined_audio_path = tempfile.mktemp(suffix=".mp3")
181
- final_audio.export(combined_audio_path, format="mp3")
182
- return combined_audio_path, None
183
 
184
  @spaces.GPU
185
  def tts_interface(transcript, voice, rate, pitch):
 
138
  return start_time_ms, audio_segments
139
  return None, None
140
 
141
+ async def process_transcript_line(line, default_voice, rate, pitch):
142
+ """Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
143
+ match = re.match(r'(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s+(.*)', line)
144
+ if match:
145
+ hours, minutes, seconds, milliseconds, text_parts = match.groups()
146
+ start_time_ms = (
147
+ int(hours) * 3600000 +
148
+ int(minutes) * 60000 +
149
+ int(seconds) * 1000 +
150
+ int(milliseconds)
151
+ )
152
+ audio_segments = []
153
+ split_parts = re.split(r'(")', text_parts) # Split by quote marks, keeping the quotes
154
+
155
+ process_next = False
156
+ for part in split_parts:
157
+ if part == '"':
158
+ process_next = not process_next
159
+ continue
160
+ if process_next and part.strip():
161
+ audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
162
+ if audio_path:
163
+ audio_segments.append(audio_path)
164
+ elif not process_next and part.strip():
165
+ audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch) # Process unquoted text with default voice
166
+ if audio_path:
167
+ audio_segments.append(audio_path)
168
+
169
+ return start_time_ms, audio_segments
170
+ return None, None
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  @spaces.GPU
173
  def tts_interface(transcript, voice, rate, pitch):