Update app.py
Browse files
app.py
CHANGED
@@ -202,10 +202,10 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
|
|
202 |
timed_audio_segments = []
|
203 |
max_end_time_ms = 0
|
204 |
previous_end_time_ms = 0
|
205 |
-
|
206 |
|
207 |
-
|
208 |
-
start_time, audio_paths = await process_transcript_line(
|
209 |
if start_time is not None and audio_paths:
|
210 |
combined_line_audio = AudioSegment.empty()
|
211 |
for path in audio_paths:
|
@@ -216,17 +216,43 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
|
|
216 |
except FileNotFoundError:
|
217 |
print(f"Warning: Audio file not found: {path}")
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
|
229 |
-
previous_start_time_ms = start_time
|
230 |
previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
|
231 |
max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
|
232 |
elif audio_paths:
|
@@ -235,6 +261,7 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
|
|
235 |
os.remove(path)
|
236 |
except FileNotFoundError:
|
237 |
pass # Clean up even if no timestamp
|
|
|
238 |
|
239 |
if not timed_audio_segments:
|
240 |
return None, "No processable audio segments found."
|
|
|
202 |
timed_audio_segments = []
|
203 |
max_end_time_ms = 0
|
204 |
previous_end_time_ms = 0
|
205 |
+
i = 0
|
206 |
|
207 |
+
while i < len(lines):
|
208 |
+
start_time, audio_paths = await process_transcript_line(lines[i], voice, rate, pitch)
|
209 |
if start_time is not None and audio_paths:
|
210 |
combined_line_audio = AudioSegment.empty()
|
211 |
for path in audio_paths:
|
|
|
216 |
except FileNotFoundError:
|
217 |
print(f"Warning: Audio file not found: {path}")
|
218 |
|
219 |
+
current_audio_duration = len(combined_line_audio)
|
220 |
+
intended_start_time = start_time
|
221 |
+
|
222 |
+
# Check duration until the next timestamp
|
223 |
+
if i + 1 < len(lines):
|
224 |
+
next_start_time = int(lines[i+1].split(',')[0].replace(':', '')) * 1000
|
225 |
+
next_start_time_ms = (next_start_time // 1000000 * 3600000) + ((next_start_time % 1000000) // 10000 * 60000) + ((next_start_time % 1000000) % 10000 // 100) * 1000 + (next_start_time % 1000000) % 10000 % 100
|
226 |
+
duration_to_next = next_start_time_ms - start_time
|
227 |
+
|
228 |
+
if current_audio_duration > duration_to_next:
|
229 |
+
# Hold and append audio from subsequent lines
|
230 |
+
j = i + 1
|
231 |
+
while j < len(lines):
|
232 |
+
next_start_time, next_audio_paths = await process_transcript_line(lines[j], voice, rate, pitch)
|
233 |
+
if next_start_time is not None and next_audio_paths:
|
234 |
+
for next_path in next_audio_paths:
|
235 |
+
try:
|
236 |
+
next_audio = AudioSegment.from_mp3(next_path)
|
237 |
+
combined_line_audio += next_audio
|
238 |
+
os.remove(next_path)
|
239 |
+
except FileNotFoundError:
|
240 |
+
print(f"Warning: Audio file not found: {next_path}")
|
241 |
+
current_audio_duration = len(combined_line_audio)
|
242 |
+
|
243 |
+
#check duration to the next timestamp.
|
244 |
+
if j + 1 < len(lines):
|
245 |
+
next_start_time_2 = int(lines[j+1].split(',')[0].replace(':', '')) * 1000
|
246 |
+
next_start_time_ms_2 = (next_start_time_2 // 1000000 * 3600000) + ((next_start_time_2 % 1000000) // 10000 * 60000 ) + ((next_start_time_2 % 1000000) % 10000 // 100) * 1000 + (next_start_time_2 % 1000000) % 10000 % 100
|
247 |
+
duration_to_next_2 = next_start_time_ms_2 - start_time
|
248 |
+
if current_audio_duration <= duration_to_next_2:
|
249 |
+
break
|
250 |
+
j += 1
|
251 |
+
else:
|
252 |
+
break
|
253 |
+
i = j #update i to j
|
254 |
+
|
255 |
timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
|
|
|
256 |
previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
|
257 |
max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
|
258 |
elif audio_paths:
|
|
|
261 |
os.remove(path)
|
262 |
except FileNotFoundError:
|
263 |
pass # Clean up even if no timestamp
|
264 |
+
i += 1
|
265 |
|
266 |
if not timed_audio_segments:
|
267 |
return None, "No processable audio segments found."
|