TenzinGayche
commited on
Commit
•
0db7ad9
1
Parent(s):
5f5648d
Update handler.py
Browse files- handler.py +27 -0
handler.py
CHANGED
@@ -12,6 +12,24 @@ import base64
|
|
12 |
import re
|
13 |
import requests
|
14 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
converter = pyewts.pyewts()
|
16 |
def download_file(url, destination):
|
17 |
response = requests.get(url)
|
@@ -84,6 +102,15 @@ class EndpointHandler():
|
|
84 |
speaker_embedding = torch.tensor(speaker_embedding)
|
85 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
86 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
# Create a unique temporary WAV file
|
88 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
|
89 |
temp_wav_path = temp_wav_file.name
|
|
|
12 |
import re
|
13 |
import requests
|
14 |
import os
|
15 |
+
from pydub import AudioSegment
|
16 |
+
def increase_volume_without_distortion(audio_data, sample_rate, target_dBFS):
|
17 |
+
# Create an AudioSegment from raw audio data
|
18 |
+
audio_segment = AudioSegment(
|
19 |
+
audio_data.tobytes(),
|
20 |
+
frame_rate=sample_rate,
|
21 |
+
sample_width=audio_data.dtype.itemsize,
|
22 |
+
channels=1 # or 2 for stereo
|
23 |
+
)
|
24 |
+
|
25 |
+
# Normalize the audio level
|
26 |
+
change_in_dBFS = target_dBFS - audio_segment.dBFS
|
27 |
+
normalized_audio = audio_segment.apply_gain(change_in_dBFS)
|
28 |
+
|
29 |
+
# Convert the AudioSegment back to a numpy array
|
30 |
+
normalized_audio_data = np.array(normalized_audio.get_array_of_samples()).astype(np.int16)
|
31 |
+
|
32 |
+
return normalized_audio_data
|
33 |
converter = pyewts.pyewts()
|
34 |
def download_file(url, destination):
|
35 |
response = requests.get(url)
|
|
|
102 |
speaker_embedding = torch.tensor(speaker_embedding)
|
103 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
104 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
105 |
+
if isinstance(speech, torch.Tensor):
|
106 |
+
|
107 |
+
|
108 |
+
speech = speech.numpy()
|
109 |
+
|
110 |
+
# Increase volume without distortion
|
111 |
+
target_dBFS = -20.0 # Adjust the value according to your requirement
|
112 |
+
speech = increase_volume_without_distortion(speech, 16000, target_dBFS)
|
113 |
+
|
114 |
# Create a unique temporary WAV file
|
115 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
|
116 |
temp_wav_path = temp_wav_file.name
|