openpecha
/

speecht5-tts-01

Model card Files Files and versions Community

TenzinGayche commited on Oct 23, 2023

Commit

0db7ad9

·

1 Parent(s): 5f5648d

Update handler.py

Files changed (1) hide show

handler.py +27 -0

handler.py CHANGED Viewed

@@ -12,6 +12,24 @@ import base64
 import re
 import requests
 import os
 converter = pyewts.pyewts()
 def download_file(url, destination):
     response = requests.get(url)
@@ -84,6 +102,15 @@ class EndpointHandler():
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
         # Create a unique temporary WAV file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
             temp_wav_path = temp_wav_file.name

 import re
 import requests
 import os
+from pydub import AudioSegment
+def increase_volume_without_distortion(audio_data, sample_rate, target_dBFS):
+    # Create an AudioSegment from raw audio data
+    audio_segment = AudioSegment(
+        audio_data.tobytes(),
+        frame_rate=sample_rate,
+        sample_width=audio_data.dtype.itemsize,
+        channels=1  # or 2 for stereo
+    )
+    # Normalize the audio level
+    change_in_dBFS = target_dBFS - audio_segment.dBFS
+    normalized_audio = audio_segment.apply_gain(change_in_dBFS)
+    # Convert the AudioSegment back to a numpy array
+    normalized_audio_data = np.array(normalized_audio.get_array_of_samples()).astype(np.int16)
+    return normalized_audio_data
 converter = pyewts.pyewts()
 def download_file(url, destination):
     response = requests.get(url)
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
+        if isinstance(speech, torch.Tensor):
+            speech = speech.numpy()
+    # Increase volume without distortion
+        target_dBFS = -20.0  # Adjust the value according to your requirement
+        speech = increase_volume_without_distortion(speech, 16000, target_dBFS)
         # Create a unique temporary WAV file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
             temp_wav_path = temp_wav_file.name