TenzinGayche commited on
Commit
e3092d1
1 Parent(s): 4d2be3f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +8 -3
handler.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Any
2
  import librosa
3
  import numpy as np
4
  import torch
@@ -7,6 +7,7 @@ import noisereduce as nr
7
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
8
  from num2tib.core import convert
9
  from num2tib.core import convert2text
 
10
  import re
11
  import requests
12
  converter = pyewts.pyewts()
@@ -60,7 +61,7 @@ class EndpointHandler():
60
  self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
61
 
62
 
63
- def __call__(self, data: Dict[str, Any]) -> bytes:
64
  """_summary_
65
 
66
  Args:
@@ -86,4 +87,8 @@ class EndpointHandler():
86
  speaker_embedding = torch.tensor(speaker_embedding)
87
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
88
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
89
- return speech.tobytes()
 
 
 
 
 
1
+ from typing import Dict, Any,Union
2
  import librosa
3
  import numpy as np
4
  import torch
 
7
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
8
  from num2tib.core import convert
9
  from num2tib.core import convert2text
10
+ import base64
11
  import re
12
  import requests
13
  converter = pyewts.pyewts()
 
61
  self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
62
 
63
 
64
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Union[int, str]]:
65
  """_summary_
66
 
67
  Args:
 
87
  speaker_embedding = torch.tensor(speaker_embedding)
88
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
89
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
90
+ return {
91
+ "sample_rate": 16000,
92
+ "audio": base64.b64encode(speech.numpy()).decode("utf-8"),
93
+
94
+ }