cnph001 commited on
Commit
218e261
·
verified ·
1 Parent(s): 0d2cfad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -7
app.py CHANGED
@@ -54,7 +54,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
54
  current_rate = rate
55
  current_pitch = pitch
56
  processed_text = text_segment.strip()
57
- print(f"Processing this text segment: {processed_text}") # Debug
58
  voice_map = {
59
  "1F": "en-GB-SoniaNeural",
60
  "2M": "en-GB-RyanNeural",
@@ -102,12 +102,23 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
103
  audio_path = tmp_file.name
104
  await communicate.save(audio_path)
105
- if overall_target_duration_ms is not None and os.path.exists(audio_path) and overall_target_duration_ms > 0:
 
106
  audio = AudioSegment.from_mp3(audio_path)
107
- audio_duration_ms = len(audio)
108
- # We don't do the stretching here anymore for individual segments
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- return audio_path
111
  except Exception as e:
112
  print(f"Edge TTS error processing '{processed_text}': {e}")
113
  return None
@@ -124,7 +135,6 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
124
  int(start_s) * 1000 +
125
  int(start_ms)
126
  )
127
-
128
  audio_segments = []
129
  split_parts = re.split(r'[“”"]', text_parts)
130
  process_next = False
@@ -140,7 +150,28 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
140
  audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
141
  if audio_path:
142
  audio_segments.append(audio_path)
143
- return start_time_ms, audio_segments, overall_duration_ms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  return None, None, None
145
 
146
  async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):
 
54
  current_rate = rate
55
  current_pitch = pitch
56
  processed_text = text_segment.strip()
57
+ print(f"Processing this text segment: '{processed_text}'") # Debug
58
  voice_map = {
59
  "1F": "en-GB-SoniaNeural",
60
  "2M": "en-GB-RyanNeural",
 
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
103
  audio_path = tmp_file.name
104
  await communicate.save(audio_path)
105
+
106
+ if os.path.exists(audio_path):
107
  audio = AudioSegment.from_mp3(audio_path)
108
+ # Trim leading and trailing silence
109
+ def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
110
+ trim_ms = 0
111
+ assert chunk_size > 0 # to avoid infinite loop
112
+ while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
113
+ trim_ms += chunk_size
114
+ return trim_ms
115
+
116
+ start_trim = detect_leading_silence(audio)
117
+ end_trim = detect_leading_silence(audio.reverse())
118
+ trimmed_audio = audio[start_trim:len(audio)-end_trim]
119
+ trimmed_audio.export(audio_path, format="mp3") # Overwrite with trimmed version
120
+ return audio_path
121
 
 
122
  except Exception as e:
123
  print(f"Edge TTS error processing '{processed_text}': {e}")
124
  return None
 
135
  int(start_s) * 1000 +
136
  int(start_ms)
137
  )
 
138
  audio_segments = []
139
  split_parts = re.split(r'[“”"]', text_parts)
140
  process_next = False
 
150
  audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
151
  if audio_path:
152
  audio_segments.append(audio_path)
153
+
154
+ if audio_segments:
155
+ combined_audio = AudioSegment.empty()
156
+ for segment_path in audio_segments:
157
+ try:
158
+ segment = AudioSegment.from_mp3(segment_path)
159
+ combined_audio += segment
160
+ os.remove(segment_path) # Clean up individual segment files
161
+ except Exception as e:
162
+ print(f"Error loading or combining audio segment {segment_path}: {e}")
163
+ return None, None, None
164
+
165
+ combined_audio_path = f"combined_audio_{start_time_ms}.mp3"
166
+ try:
167
+ combined_audio.export(combined_audio_path, format="mp3")
168
+ return start_time_ms, [combined_audio_path], overall_duration_ms
169
+ except Exception as e:
170
+ print(f"Error exporting combined audio: {e}")
171
+ return None, None, None
172
+
173
+ return start_time_ms, [], overall_duration_ms # Return empty list if no audio generated
174
+
175
  return None, None, None
176
 
177
  async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):