fix tts interface error (#2197)
Browse files### What problem does this PR solve?
fix tts interface error
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Co-authored-by: Zhedong Cen <[email protected]>
Co-authored-by: Kevin Hu <[email protected]>
- api/apps/conversation_app.py +3 -3
- api/db/services/llm_service.py +12 -1
- rag/llm/tts_model.py +8 -5
api/apps/conversation_app.py
CHANGED
|
@@ -196,12 +196,12 @@ def tts():
|
|
| 196 |
tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id)
|
| 197 |
def stream_audio():
|
| 198 |
try:
|
| 199 |
-
for chunk in tts_mdl(text):
|
| 200 |
yield chunk
|
| 201 |
except Exception as e:
|
| 202 |
-
yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
|
| 203 |
"data": {"answer": "**ERROR**: "+str(e)}},
|
| 204 |
-
ensure_ascii=False).encode('utf-8')
|
| 205 |
|
| 206 |
resp = Response(stream_audio(), mimetype="audio/mpeg")
|
| 207 |
resp.headers.add_header("Cache-Control", "no-cache")
|
|
|
|
| 196 |
tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id)
|
| 197 |
def stream_audio():
|
| 198 |
try:
|
| 199 |
+
for chunk in tts_mdl.tts(text):
|
| 200 |
yield chunk
|
| 201 |
except Exception as e:
|
| 202 |
+
yield ("data:" + json.dumps({"retcode": 500, "retmsg": str(e),
|
| 203 |
"data": {"answer": "**ERROR**: "+str(e)}},
|
| 204 |
+
ensure_ascii=False)).encode('utf-8')
|
| 205 |
|
| 206 |
resp = Response(stream_audio(), mimetype="audio/mpeg")
|
| 207 |
resp.headers.add_header("Cache-Control", "no-cache")
|
api/db/services/llm_service.py
CHANGED
|
@@ -194,7 +194,7 @@ class LLMBundle(object):
|
|
| 194 |
for lm in LLMService.query(llm_name=llm_name):
|
| 195 |
self.max_length = lm.max_tokens
|
| 196 |
break
|
| 197 |
-
|
| 198 |
def encode(self, texts: list, batch_size=32):
|
| 199 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
| 200 |
if not TenantLLMService.increase_usage(
|
|
@@ -235,6 +235,17 @@ class LLMBundle(object):
|
|
| 235 |
"Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
|
| 236 |
return txt
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
def chat(self, system, history, gen_conf):
|
| 239 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 240 |
if not TenantLLMService.increase_usage(
|
|
|
|
| 194 |
for lm in LLMService.query(llm_name=llm_name):
|
| 195 |
self.max_length = lm.max_tokens
|
| 196 |
break
|
| 197 |
+
|
| 198 |
def encode(self, texts: list, batch_size=32):
|
| 199 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
| 200 |
if not TenantLLMService.increase_usage(
|
|
|
|
| 235 |
"Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
|
| 236 |
return txt
|
| 237 |
|
| 238 |
+
def tts(self, text):
|
| 239 |
+
for chunk in self.mdl.tts(text):
|
| 240 |
+
if isinstance(chunk,int):
|
| 241 |
+
if not TenantLLMService.increase_usage(
|
| 242 |
+
self.tenant_id, self.llm_type, chunk, self.llm_name):
|
| 243 |
+
database_logger.error(
|
| 244 |
+
"Can't update token usage for {}/TTS".format(self.tenant_id))
|
| 245 |
+
return
|
| 246 |
+
yield chunk
|
| 247 |
+
|
| 248 |
+
|
| 249 |
def chat(self, system, history, gen_conf):
|
| 250 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 251 |
if not TenantLLMService.increase_usage(
|
rag/llm/tts_model.py
CHANGED
|
@@ -21,7 +21,7 @@ import ormsgpack
|
|
| 21 |
from pydantic import BaseModel, conint
|
| 22 |
from rag.utils import num_tokens_from_string
|
| 23 |
import json
|
| 24 |
-
|
| 25 |
|
| 26 |
class ServeReferenceAudio(BaseModel):
|
| 27 |
audio: bytes
|
|
@@ -50,9 +50,11 @@ class Base(ABC):
|
|
| 50 |
def __init__(self, key, model_name, base_url):
|
| 51 |
pass
|
| 52 |
|
| 53 |
-
def
|
| 54 |
pass
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
|
| 57 |
class FishAudioTTS(Base):
|
| 58 |
def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
|
|
@@ -66,10 +68,11 @@ class FishAudioTTS(Base):
|
|
| 66 |
self.ref_id = key.get("fish_audio_refid")
|
| 67 |
self.base_url = base_url
|
| 68 |
|
| 69 |
-
def
|
| 70 |
from http import HTTPStatus
|
| 71 |
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
with httpx.Client() as client:
|
| 75 |
try:
|
|
|
|
| 21 |
from pydantic import BaseModel, conint
|
| 22 |
from rag.utils import num_tokens_from_string
|
| 23 |
import json
|
| 24 |
+
import re
|
| 25 |
|
| 26 |
class ServeReferenceAudio(BaseModel):
|
| 27 |
audio: bytes
|
|
|
|
| 50 |
def __init__(self, key, model_name, base_url):
|
| 51 |
pass
|
| 52 |
|
| 53 |
+
def tts(self, audio):
|
| 54 |
pass
|
| 55 |
+
|
| 56 |
+
def normalize_text(text):
|
| 57 |
+
return re.sub(r'(\*\*|##\d+\$\$|#)', '', text)
|
| 58 |
|
| 59 |
class FishAudioTTS(Base):
|
| 60 |
def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
|
|
|
|
| 68 |
self.ref_id = key.get("fish_audio_refid")
|
| 69 |
self.base_url = base_url
|
| 70 |
|
| 71 |
+
def tts(self, text):
|
| 72 |
from http import HTTPStatus
|
| 73 |
|
| 74 |
+
text = self.normalize_text(text)
|
| 75 |
+
request = ServeTTSRequest(text=text, reference_id=self.ref_id)
|
| 76 |
|
| 77 |
with httpx.Client() as client:
|
| 78 |
try:
|