Update app.py
Browse files
app.py
CHANGED
@@ -138,48 +138,36 @@ async def process_transcript_line(line, default_voice, rate, pitch):
|
|
138 |
return start_time_ms, audio_segments
|
139 |
return None, None
|
140 |
|
141 |
-
async def
|
142 |
-
|
143 |
-
|
144 |
-
if
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
pass # Clean up even if no timestamp
|
172 |
-
|
173 |
-
if not timed_audio_segments:
|
174 |
-
return None, "No processable audio segments found."
|
175 |
-
|
176 |
-
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
177 |
-
for segment in timed_audio_segments:
|
178 |
-
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
179 |
-
|
180 |
-
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
181 |
-
final_audio.export(combined_audio_path, format="mp3")
|
182 |
-
return combined_audio_path, None
|
183 |
|
184 |
@spaces.GPU
|
185 |
def tts_interface(transcript, voice, rate, pitch):
|
|
|
138 |
return start_time_ms, audio_segments
|
139 |
return None, None
|
140 |
|
141 |
+
async def process_transcript_line(line, default_voice, rate, pitch):
|
142 |
+
"""Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
|
143 |
+
match = re.match(r'(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s+(.*)', line)
|
144 |
+
if match:
|
145 |
+
hours, minutes, seconds, milliseconds, text_parts = match.groups()
|
146 |
+
start_time_ms = (
|
147 |
+
int(hours) * 3600000 +
|
148 |
+
int(minutes) * 60000 +
|
149 |
+
int(seconds) * 1000 +
|
150 |
+
int(milliseconds)
|
151 |
+
)
|
152 |
+
audio_segments = []
|
153 |
+
split_parts = re.split(r'(")', text_parts) # Split by quote marks, keeping the quotes
|
154 |
+
|
155 |
+
process_next = False
|
156 |
+
for part in split_parts:
|
157 |
+
if part == '"':
|
158 |
+
process_next = not process_next
|
159 |
+
continue
|
160 |
+
if process_next and part.strip():
|
161 |
+
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
|
162 |
+
if audio_path:
|
163 |
+
audio_segments.append(audio_path)
|
164 |
+
elif not process_next and part.strip():
|
165 |
+
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch) # Process unquoted text with default voice
|
166 |
+
if audio_path:
|
167 |
+
audio_segments.append(audio_path)
|
168 |
+
|
169 |
+
return start_time_ms, audio_segments
|
170 |
+
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
@spaces.GPU
|
173 |
def tts_interface(transcript, voice, rate, pitch):
|