cnph001 commited on
Commit
3229678
·
verified ·
1 Parent(s): a6eabef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -13
app.py CHANGED
@@ -202,10 +202,10 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
202
  timed_audio_segments = []
203
  max_end_time_ms = 0
204
  previous_end_time_ms = 0
205
- previous_start_time_ms = 0
206
 
207
- for i, line in enumerate(lines):
208
- start_time, audio_paths = await process_transcript_line(line, voice, rate, pitch)
209
  if start_time is not None and audio_paths:
210
  combined_line_audio = AudioSegment.empty()
211
  for path in audio_paths:
@@ -216,17 +216,43 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
216
  except FileNotFoundError:
217
  print(f"Warning: Audio file not found: {path}")
218
 
219
- if combined_line_audio:
220
- current_audio_duration = len(combined_line_audio)
221
- intended_start_time = start_time
222
-
223
- if i > 0:
224
- time_difference = start_time - previous_start_time_ms
225
- if current_audio_duration > time_difference:
226
- intended_start_time = previous_end_time_ms
227
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
229
- previous_start_time_ms = start_time
230
  previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
231
  max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
232
  elif audio_paths:
@@ -235,6 +261,7 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
235
  os.remove(path)
236
  except FileNotFoundError:
237
  pass # Clean up even if no timestamp
 
238
 
239
  if not timed_audio_segments:
240
  return None, "No processable audio segments found."
 
202
  timed_audio_segments = []
203
  max_end_time_ms = 0
204
  previous_end_time_ms = 0
205
+ i = 0
206
 
207
+ while i < len(lines):
208
+ start_time, audio_paths = await process_transcript_line(lines[i], voice, rate, pitch)
209
  if start_time is not None and audio_paths:
210
  combined_line_audio = AudioSegment.empty()
211
  for path in audio_paths:
 
216
  except FileNotFoundError:
217
  print(f"Warning: Audio file not found: {path}")
218
 
219
+ current_audio_duration = len(combined_line_audio)
220
+ intended_start_time = start_time
221
+
222
+ # Check duration until the next timestamp
223
+ if i + 1 < len(lines):
224
+ next_start_time = int(lines[i+1].split(',')[0].replace(':', '')) * 1000
225
+ next_start_time_ms = (next_start_time // 1000000 * 3600000) + ((next_start_time % 1000000) // 10000 * 60000) + ((next_start_time % 1000000) % 10000 // 100) * 1000 + (next_start_time % 1000000) % 10000 % 100
226
+ duration_to_next = next_start_time_ms - start_time
227
+
228
+ if current_audio_duration > duration_to_next:
229
+ # Hold and append audio from subsequent lines
230
+ j = i + 1
231
+ while j < len(lines):
232
+ next_start_time, next_audio_paths = await process_transcript_line(lines[j], voice, rate, pitch)
233
+ if next_start_time is not None and next_audio_paths:
234
+ for next_path in next_audio_paths:
235
+ try:
236
+ next_audio = AudioSegment.from_mp3(next_path)
237
+ combined_line_audio += next_audio
238
+ os.remove(next_path)
239
+ except FileNotFoundError:
240
+ print(f"Warning: Audio file not found: {next_path}")
241
+ current_audio_duration = len(combined_line_audio)
242
+
243
+ #check duration to the next timestamp.
244
+ if j + 1 < len(lines):
245
+ next_start_time_2 = int(lines[j+1].split(',')[0].replace(':', '')) * 1000
246
+ next_start_time_ms_2 = (next_start_time_2 // 1000000 * 3600000) + ((next_start_time_2 % 1000000) // 10000 * 60000 ) + ((next_start_time_2 % 1000000) % 10000 // 100) * 1000 + (next_start_time_2 % 1000000) % 10000 % 100
247
+ duration_to_next_2 = next_start_time_ms_2 - start_time
248
+ if current_audio_duration <= duration_to_next_2:
249
+ break
250
+ j += 1
251
+ else:
252
+ break
253
+ i = j #update i to j
254
+
255
  timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
 
256
  previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
257
  max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
258
  elif audio_paths:
 
261
  os.remove(path)
262
  except FileNotFoundError:
263
  pass # Clean up even if no timestamp
264
+ i += 1
265
 
266
  if not timed_audio_segments:
267
  return None, "No processable audio segments found."