TenzinGayche
commited on
Commit
•
e3092d1
1
Parent(s):
4d2be3f
Update handler.py
Browse files- handler.py +8 -3
handler.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from typing import Dict, Any
|
2 |
import librosa
|
3 |
import numpy as np
|
4 |
import torch
|
@@ -7,6 +7,7 @@ import noisereduce as nr
|
|
7 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
8 |
from num2tib.core import convert
|
9 |
from num2tib.core import convert2text
|
|
|
10 |
import re
|
11 |
import requests
|
12 |
converter = pyewts.pyewts()
|
@@ -60,7 +61,7 @@ class EndpointHandler():
|
|
60 |
self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
61 |
|
62 |
|
63 |
-
def __call__(self, data: Dict[str, Any]) ->
|
64 |
"""_summary_
|
65 |
|
66 |
Args:
|
@@ -86,4 +87,8 @@ class EndpointHandler():
|
|
86 |
speaker_embedding = torch.tensor(speaker_embedding)
|
87 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
88 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
89 |
-
return
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any,Union
|
2 |
import librosa
|
3 |
import numpy as np
|
4 |
import torch
|
|
|
7 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
8 |
from num2tib.core import convert
|
9 |
from num2tib.core import convert2text
|
10 |
+
import base64
|
11 |
import re
|
12 |
import requests
|
13 |
converter = pyewts.pyewts()
|
|
|
61 |
self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
62 |
|
63 |
|
64 |
+
def __call__(self, data: Dict[str, Any]) -> Dict[str, Union[int, str]]:
|
65 |
"""_summary_
|
66 |
|
67 |
Args:
|
|
|
87 |
speaker_embedding = torch.tensor(speaker_embedding)
|
88 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
89 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
90 |
+
return {
|
91 |
+
"sample_rate": 16000,
|
92 |
+
"audio": base64.b64encode(speech.numpy()).decode("utf-8"),
|
93 |
+
|
94 |
+
}
|