Update app.py
Browse files
app.py
CHANGED
@@ -54,7 +54,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
54 |
current_rate = rate
|
55 |
current_pitch = pitch
|
56 |
processed_text = text_segment.strip()
|
57 |
-
print(f"Processing this text segment: {processed_text}") # Debug
|
58 |
voice_map = {
|
59 |
"1F": "en-GB-SoniaNeural",
|
60 |
"2M": "en-GB-RyanNeural",
|
@@ -102,12 +102,23 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
102 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
103 |
audio_path = tmp_file.name
|
104 |
await communicate.save(audio_path)
|
105 |
-
|
|
|
106 |
audio = AudioSegment.from_mp3(audio_path)
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
return audio_path
|
111 |
except Exception as e:
|
112 |
print(f"Edge TTS error processing '{processed_text}': {e}")
|
113 |
return None
|
@@ -124,7 +135,6 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
|
|
124 |
int(start_s) * 1000 +
|
125 |
int(start_ms)
|
126 |
)
|
127 |
-
|
128 |
audio_segments = []
|
129 |
split_parts = re.split(r'[“”"]', text_parts)
|
130 |
process_next = False
|
@@ -140,7 +150,28 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
|
|
140 |
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
|
141 |
if audio_path:
|
142 |
audio_segments.append(audio_path)
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
return None, None, None
|
145 |
|
146 |
async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):
|
|
|
54 |
current_rate = rate
|
55 |
current_pitch = pitch
|
56 |
processed_text = text_segment.strip()
|
57 |
+
print(f"Processing this text segment: '{processed_text}'") # Debug
|
58 |
voice_map = {
|
59 |
"1F": "en-GB-SoniaNeural",
|
60 |
"2M": "en-GB-RyanNeural",
|
|
|
102 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
103 |
audio_path = tmp_file.name
|
104 |
await communicate.save(audio_path)
|
105 |
+
|
106 |
+
if os.path.exists(audio_path):
|
107 |
audio = AudioSegment.from_mp3(audio_path)
|
108 |
+
# Trim leading and trailing silence
|
109 |
+
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
|
110 |
+
trim_ms = 0
|
111 |
+
assert chunk_size > 0 # to avoid infinite loop
|
112 |
+
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
|
113 |
+
trim_ms += chunk_size
|
114 |
+
return trim_ms
|
115 |
+
|
116 |
+
start_trim = detect_leading_silence(audio)
|
117 |
+
end_trim = detect_leading_silence(audio.reverse())
|
118 |
+
trimmed_audio = audio[start_trim:len(audio)-end_trim]
|
119 |
+
trimmed_audio.export(audio_path, format="mp3") # Overwrite with trimmed version
|
120 |
+
return audio_path
|
121 |
|
|
|
122 |
except Exception as e:
|
123 |
print(f"Edge TTS error processing '{processed_text}': {e}")
|
124 |
return None
|
|
|
135 |
int(start_s) * 1000 +
|
136 |
int(start_ms)
|
137 |
)
|
|
|
138 |
audio_segments = []
|
139 |
split_parts = re.split(r'[“”"]', text_parts)
|
140 |
process_next = False
|
|
|
150 |
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
|
151 |
if audio_path:
|
152 |
audio_segments.append(audio_path)
|
153 |
+
|
154 |
+
if audio_segments:
|
155 |
+
combined_audio = AudioSegment.empty()
|
156 |
+
for segment_path in audio_segments:
|
157 |
+
try:
|
158 |
+
segment = AudioSegment.from_mp3(segment_path)
|
159 |
+
combined_audio += segment
|
160 |
+
os.remove(segment_path) # Clean up individual segment files
|
161 |
+
except Exception as e:
|
162 |
+
print(f"Error loading or combining audio segment {segment_path}: {e}")
|
163 |
+
return None, None, None
|
164 |
+
|
165 |
+
combined_audio_path = f"combined_audio_{start_time_ms}.mp3"
|
166 |
+
try:
|
167 |
+
combined_audio.export(combined_audio_path, format="mp3")
|
168 |
+
return start_time_ms, [combined_audio_path], overall_duration_ms
|
169 |
+
except Exception as e:
|
170 |
+
print(f"Error exporting combined audio: {e}")
|
171 |
+
return None, None, None
|
172 |
+
|
173 |
+
return start_time_ms, [], overall_duration_ms # Return empty list if no audio generated
|
174 |
+
|
175 |
return None, None, None
|
176 |
|
177 |
async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):
|