黄腾 aopstudio Kevin Hu commited on
Commit
ac8ea20
·
1 Parent(s): 30a7616

fix tts interface error (#2197)

Browse files

### What problem does this PR solve?

fix tts interface error

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>

api/apps/conversation_app.py CHANGED
@@ -196,12 +196,12 @@ def tts():
196
  tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id)
197
  def stream_audio():
198
  try:
199
- for chunk in tts_mdl(text):
200
  yield chunk
201
  except Exception as e:
202
- yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
203
  "data": {"answer": "**ERROR**: "+str(e)}},
204
- ensure_ascii=False).encode('utf-8')
205
 
206
  resp = Response(stream_audio(), mimetype="audio/mpeg")
207
  resp.headers.add_header("Cache-Control", "no-cache")
 
196
  tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id)
197
  def stream_audio():
198
  try:
199
+ for chunk in tts_mdl.tts(text):
200
  yield chunk
201
  except Exception as e:
202
+ yield ("data:" + json.dumps({"retcode": 500, "retmsg": str(e),
203
  "data": {"answer": "**ERROR**: "+str(e)}},
204
+ ensure_ascii=False)).encode('utf-8')
205
 
206
  resp = Response(stream_audio(), mimetype="audio/mpeg")
207
  resp.headers.add_header("Cache-Control", "no-cache")
api/db/services/llm_service.py CHANGED
@@ -194,7 +194,7 @@ class LLMBundle(object):
194
  for lm in LLMService.query(llm_name=llm_name):
195
  self.max_length = lm.max_tokens
196
  break
197
-
198
  def encode(self, texts: list, batch_size=32):
199
  emd, used_tokens = self.mdl.encode(texts, batch_size)
200
  if not TenantLLMService.increase_usage(
@@ -235,6 +235,17 @@ class LLMBundle(object):
235
  "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
236
  return txt
237
 
 
 
 
 
 
 
 
 
 
 
 
238
  def chat(self, system, history, gen_conf):
239
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
240
  if not TenantLLMService.increase_usage(
 
194
  for lm in LLMService.query(llm_name=llm_name):
195
  self.max_length = lm.max_tokens
196
  break
197
+
198
  def encode(self, texts: list, batch_size=32):
199
  emd, used_tokens = self.mdl.encode(texts, batch_size)
200
  if not TenantLLMService.increase_usage(
 
235
  "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
236
  return txt
237
 
238
+ def tts(self, text):
239
+ for chunk in self.mdl.tts(text):
240
+ if isinstance(chunk,int):
241
+ if not TenantLLMService.increase_usage(
242
+ self.tenant_id, self.llm_type, chunk, self.llm_name):
243
+ database_logger.error(
244
+ "Can't update token usage for {}/TTS".format(self.tenant_id))
245
+ return
246
+ yield chunk
247
+
248
+
249
  def chat(self, system, history, gen_conf):
250
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
251
  if not TenantLLMService.increase_usage(
rag/llm/tts_model.py CHANGED
@@ -21,7 +21,7 @@ import ormsgpack
21
  from pydantic import BaseModel, conint
22
  from rag.utils import num_tokens_from_string
23
  import json
24
-
25
 
26
  class ServeReferenceAudio(BaseModel):
27
  audio: bytes
@@ -50,9 +50,11 @@ class Base(ABC):
50
  def __init__(self, key, model_name, base_url):
51
  pass
52
 
53
- def transcription(self, audio):
54
  pass
55
-
 
 
56
 
57
  class FishAudioTTS(Base):
58
  def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
@@ -66,10 +68,11 @@ class FishAudioTTS(Base):
66
  self.ref_id = key.get("fish_audio_refid")
67
  self.base_url = base_url
68
 
69
- def transcription(self, text):
70
  from http import HTTPStatus
71
 
72
- request = request = ServeTTSRequest(text=text, reference_id=self.ref_id)
 
73
 
74
  with httpx.Client() as client:
75
  try:
 
21
  from pydantic import BaseModel, conint
22
  from rag.utils import num_tokens_from_string
23
  import json
24
+ import re
25
 
26
  class ServeReferenceAudio(BaseModel):
27
  audio: bytes
 
50
  def __init__(self, key, model_name, base_url):
51
  pass
52
 
53
+ def tts(self, audio):
54
  pass
55
+
56
+ def normalize_text(text):
57
+ return re.sub(r'(\*\*|##\d+\$\$|#)', '', text)
58
 
59
  class FishAudioTTS(Base):
60
  def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
 
68
  self.ref_id = key.get("fish_audio_refid")
69
  self.base_url = base_url
70
 
71
+ def tts(self, text):
72
  from http import HTTPStatus
73
 
74
+ text = self.normalize_text(text)
75
+ request = ServeTTSRequest(text=text, reference_id=self.ref_id)
76
 
77
  with httpx.Client() as client:
78
  try: