H
commited on
Commit
·
0c9dbca
1
Parent(s):
85d9ac3
Add ParsertType Audio (#1637)
Browse files### What problem does this PR solve?
#1514
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/api_app.py +3 -1
- api/apps/dataset_api.py +5 -1
- api/apps/document_app.py +4 -0
- api/db/__init__.py +2 -1
- api/db/init_data.py +3 -1
- api/db/services/llm_service.py +17 -1
- api/settings.py +1 -1
- rag/app/audio.py +42 -0
- rag/app/picture.py +1 -1
- rag/svr/task_executor.py +2 -1
api/apps/api_app.py
CHANGED
|
@@ -335,6 +335,8 @@ def upload():
|
|
| 335 |
doc["parser_id"] = request.form.get("parser_id").strip()
|
| 336 |
if doc["type"] == FileType.VISUAL:
|
| 337 |
doc["parser_id"] = ParserType.PICTURE.value
|
|
|
|
|
|
|
| 338 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 339 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 340 |
|
|
@@ -581,4 +583,4 @@ def completion_faq():
|
|
| 581 |
return response
|
| 582 |
|
| 583 |
except Exception as e:
|
| 584 |
-
return server_error_response(e)
|
|
|
|
| 335 |
doc["parser_id"] = request.form.get("parser_id").strip()
|
| 336 |
if doc["type"] == FileType.VISUAL:
|
| 337 |
doc["parser_id"] = ParserType.PICTURE.value
|
| 338 |
+
if doc["type"] == FileType.AURAL:
|
| 339 |
+
doc["parser_id"] = ParserType.AUDIO.value
|
| 340 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 341 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 342 |
|
|
|
|
| 583 |
return response
|
| 584 |
|
| 585 |
except Exception as e:
|
| 586 |
+
return server_error_response(e)
|
api/apps/dataset_api.py
CHANGED
|
@@ -39,7 +39,7 @@ from api.utils import get_uuid
|
|
| 39 |
from api.utils.api_utils import construct_json_result, construct_error_response
|
| 40 |
from api.utils.api_utils import construct_result, validate_request
|
| 41 |
from api.utils.file_utils import filename_type, thumbnail
|
| 42 |
-
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture
|
| 43 |
from rag.nlp import search
|
| 44 |
from rag.utils.es_conn import ELASTICSEARCH
|
| 45 |
from rag.utils.minio_conn import MINIO
|
|
@@ -377,6 +377,8 @@ def upload_documents(dataset_id):
|
|
| 377 |
}
|
| 378 |
if doc["type"] == FileType.VISUAL:
|
| 379 |
doc["parser_id"] = ParserType.PICTURE.value
|
|
|
|
|
|
|
| 380 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 381 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 382 |
DocumentService.insert(doc)
|
|
@@ -648,6 +650,8 @@ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
|
|
| 648 |
resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
| 649 |
case "table":
|
| 650 |
table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
|
|
|
|
|
|
| 651 |
case _:
|
| 652 |
return False
|
| 653 |
|
|
|
|
| 39 |
from api.utils.api_utils import construct_json_result, construct_error_response
|
| 40 |
from api.utils.api_utils import construct_result, validate_request
|
| 41 |
from api.utils.file_utils import filename_type, thumbnail
|
| 42 |
+
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio
|
| 43 |
from rag.nlp import search
|
| 44 |
from rag.utils.es_conn import ELASTICSEARCH
|
| 45 |
from rag.utils.minio_conn import MINIO
|
|
|
|
| 377 |
}
|
| 378 |
if doc["type"] == FileType.VISUAL:
|
| 379 |
doc["parser_id"] = ParserType.PICTURE.value
|
| 380 |
+
if doc["type"] == FileType.AURAL:
|
| 381 |
+
doc["parser_id"] = ParserType.AUDIO.value
|
| 382 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 383 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 384 |
DocumentService.insert(doc)
|
|
|
|
| 650 |
resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
| 651 |
case "table":
|
| 652 |
table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
| 653 |
+
case "audio":
|
| 654 |
+
audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
| 655 |
case _:
|
| 656 |
return False
|
| 657 |
|
api/apps/document_app.py
CHANGED
|
@@ -105,6 +105,8 @@ def upload():
|
|
| 105 |
}
|
| 106 |
if doc["type"] == FileType.VISUAL:
|
| 107 |
doc["parser_id"] = ParserType.PICTURE.value
|
|
|
|
|
|
|
| 108 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 109 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 110 |
DocumentService.insert(doc)
|
|
@@ -171,6 +173,8 @@ def web_crawl():
|
|
| 171 |
}
|
| 172 |
if doc["type"] == FileType.VISUAL:
|
| 173 |
doc["parser_id"] = ParserType.PICTURE.value
|
|
|
|
|
|
|
| 174 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 175 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 176 |
DocumentService.insert(doc)
|
|
|
|
| 105 |
}
|
| 106 |
if doc["type"] == FileType.VISUAL:
|
| 107 |
doc["parser_id"] = ParserType.PICTURE.value
|
| 108 |
+
if doc["type"] == FileType.AURAL:
|
| 109 |
+
doc["parser_id"] = ParserType.AUDIO.value
|
| 110 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 111 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 112 |
DocumentService.insert(doc)
|
|
|
|
| 173 |
}
|
| 174 |
if doc["type"] == FileType.VISUAL:
|
| 175 |
doc["parser_id"] = ParserType.PICTURE.value
|
| 176 |
+
if doc["type"] == FileType.AURAL:
|
| 177 |
+
doc["parser_id"] = ParserType.AUDIO.value
|
| 178 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 179 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 180 |
DocumentService.insert(doc)
|
api/db/__init__.py
CHANGED
|
@@ -84,6 +84,7 @@ class ParserType(StrEnum):
|
|
| 84 |
NAIVE = "naive"
|
| 85 |
PICTURE = "picture"
|
| 86 |
ONE = "one"
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
class FileSource(StrEnum):
|
|
@@ -96,4 +97,4 @@ class CanvasType(StrEnum):
|
|
| 96 |
ChatBot = "chatbot"
|
| 97 |
DocBot = "docbot"
|
| 98 |
|
| 99 |
-
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
|
|
|
| 84 |
NAIVE = "naive"
|
| 85 |
PICTURE = "picture"
|
| 86 |
ONE = "one"
|
| 87 |
+
AUDIO = "audio"
|
| 88 |
|
| 89 |
|
| 90 |
class FileSource(StrEnum):
|
|
|
|
| 97 |
ChatBot = "chatbot"
|
| 98 |
DocBot = "docbot"
|
| 99 |
|
| 100 |
+
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
api/db/init_data.py
CHANGED
|
@@ -121,6 +121,8 @@ def init_llm_factory():
|
|
| 121 |
LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
|
| 122 |
LLMService.filter_delete([LLMService.model.fid == "QAnything"])
|
| 123 |
TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
|
|
|
|
|
|
|
| 124 |
## insert openai two embedding models to the current openai user.
|
| 125 |
print("Start to insert 2 OpenAI embedding models...")
|
| 126 |
tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
|
|
@@ -143,7 +145,7 @@ def init_llm_factory():
|
|
| 143 |
"""
|
| 144 |
drop table llm;
|
| 145 |
drop table llm_factories;
|
| 146 |
-
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
|
| 147 |
alter table knowledgebase modify avatar longtext;
|
| 148 |
alter table user modify avatar longtext;
|
| 149 |
alter table dialog modify icon longtext;
|
|
|
|
| 121 |
LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
|
| 122 |
LLMService.filter_delete([LLMService.model.fid == "QAnything"])
|
| 123 |
TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
|
| 124 |
+
TenantService.filter_update([1 == 1], {
|
| 125 |
+
"parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio"})
|
| 126 |
## insert openai two embedding models to the current openai user.
|
| 127 |
print("Start to insert 2 OpenAI embedding models...")
|
| 128 |
tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
|
|
|
|
| 145 |
"""
|
| 146 |
drop table llm;
|
| 147 |
drop table llm_factories;
|
| 148 |
+
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio';
|
| 149 |
alter table knowledgebase modify avatar longtext;
|
| 150 |
alter table user modify avatar longtext;
|
| 151 |
alter table dialog modify icon longtext;
|
api/db/services/llm_service.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
#
|
| 16 |
from api.db.services.user_service import TenantService
|
| 17 |
from api.settings import database_logger
|
| 18 |
-
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel
|
| 19 |
from api.db import LLMType
|
| 20 |
from api.db.db_models import DB, UserTenant
|
| 21 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
|
@@ -120,6 +120,14 @@ class TenantLLMService(CommonService):
|
|
| 120 |
return ChatModel[model_config["llm_factory"]](
|
| 121 |
model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
@classmethod
|
| 124 |
@DB.connection_context()
|
| 125 |
def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
|
|
@@ -207,6 +215,14 @@ class LLMBundle(object):
|
|
| 207 |
"Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
|
| 208 |
return txt
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
def chat(self, system, history, gen_conf):
|
| 211 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 212 |
if not TenantLLMService.increase_usage(
|
|
|
|
| 15 |
#
|
| 16 |
from api.db.services.user_service import TenantService
|
| 17 |
from api.settings import database_logger
|
| 18 |
+
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
|
| 19 |
from api.db import LLMType
|
| 20 |
from api.db.db_models import DB, UserTenant
|
| 21 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
|
|
|
| 120 |
return ChatModel[model_config["llm_factory"]](
|
| 121 |
model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
| 122 |
|
| 123 |
+
if llm_type == LLMType.SPEECH2TEXT:
|
| 124 |
+
if model_config["llm_factory"] not in Seq2txtModel:
|
| 125 |
+
return
|
| 126 |
+
return Seq2txtModel[model_config["llm_factory"]](
|
| 127 |
+
model_config["api_key"], model_config["llm_name"], lang,
|
| 128 |
+
base_url=model_config["api_base"]
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
@classmethod
|
| 132 |
@DB.connection_context()
|
| 133 |
def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
|
|
|
|
| 215 |
"Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
|
| 216 |
return txt
|
| 217 |
|
| 218 |
+
def transcription(self, audio):
|
| 219 |
+
txt, used_tokens = self.mdl.transcription(audio)
|
| 220 |
+
if not TenantLLMService.increase_usage(
|
| 221 |
+
self.tenant_id, self.llm_type, used_tokens):
|
| 222 |
+
database_logger.error(
|
| 223 |
+
"Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
|
| 224 |
+
return txt
|
| 225 |
+
|
| 226 |
def chat(self, system, history, gen_conf):
|
| 227 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 228 |
if not TenantLLMService.increase_usage(
|
api/settings.py
CHANGED
|
@@ -131,7 +131,7 @@ IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
|
|
| 131 |
API_KEY = LLM.get("api_key", "")
|
| 132 |
PARSERS = LLM.get(
|
| 133 |
"parsers",
|
| 134 |
-
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One")
|
| 135 |
|
| 136 |
# distribution
|
| 137 |
DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)
|
|
|
|
| 131 |
API_KEY = LLM.get("api_key", "")
|
| 132 |
PARSERS = LLM.get(
|
| 133 |
"parsers",
|
| 134 |
+
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio")
|
| 135 |
|
| 136 |
# distribution
|
| 137 |
DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)
|
rag/app/audio.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 2 |
+
# you may not use this file except in compliance with the License.
|
| 3 |
+
# You may obtain a copy of the License at
|
| 4 |
+
#
|
| 5 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 6 |
+
#
|
| 7 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 8 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 9 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 10 |
+
# See the License for the specific language governing permissions and
|
| 11 |
+
# limitations under the License.
|
| 12 |
+
#
|
| 13 |
+
import io
|
| 14 |
+
import re
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from api.db import LLMType
|
| 18 |
+
from rag.nlp import rag_tokenizer
|
| 19 |
+
from api.db.services.llm_service import LLMBundle
|
| 20 |
+
from rag.nlp import tokenize
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
| 24 |
+
doc = {
|
| 25 |
+
"docnm_kwd": filename,
|
| 26 |
+
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename))
|
| 27 |
+
}
|
| 28 |
+
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
| 29 |
+
|
| 30 |
+
# is it English
|
| 31 |
+
eng = lang.lower() == "english" # is_english(sections)
|
| 32 |
+
try:
|
| 33 |
+
callback(0.1, "USE Sequence2Txt LLM to transcription the audio")
|
| 34 |
+
seq2txt_mdl = LLMBundle(tenant_id, LLMType.SPEECH2TEXT, lang=lang)
|
| 35 |
+
ans = seq2txt_mdl.transcription(binary)
|
| 36 |
+
callback(0.8, "Sequence2Txt LLM respond: %s ..." % ans[:32])
|
| 37 |
+
tokenize(doc, ans, eng)
|
| 38 |
+
return [doc]
|
| 39 |
+
except Exception as e:
|
| 40 |
+
callback(prog=-1, msg=str(e))
|
| 41 |
+
|
| 42 |
+
return []
|
rag/app/picture.py
CHANGED
|
@@ -42,7 +42,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
|
| 42 |
callback(0.4, "Use CV LLM to describe the picture.")
|
| 43 |
cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang)
|
| 44 |
ans = cv_mdl.describe(binary)
|
| 45 |
-
callback(0.8, "CV LLM
|
| 46 |
txt += "\n" + ans
|
| 47 |
tokenize(doc, txt, eng)
|
| 48 |
return [doc]
|
|
|
|
| 42 |
callback(0.4, "Use CV LLM to describe the picture.")
|
| 43 |
cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang)
|
| 44 |
ans = cv_mdl.describe(binary)
|
| 45 |
+
callback(0.8, "CV LLM respond: %s ..." % ans[:32])
|
| 46 |
txt += "\n" + ans
|
| 47 |
tokenize(doc, txt, eng)
|
| 48 |
return [doc]
|
rag/svr/task_executor.py
CHANGED
|
@@ -45,7 +45,7 @@ from rag.nlp import search, rag_tokenizer
|
|
| 45 |
from io import BytesIO
|
| 46 |
import pandas as pd
|
| 47 |
|
| 48 |
-
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one
|
| 49 |
|
| 50 |
from api.db import LLMType, ParserType
|
| 51 |
from api.db.services.document_service import DocumentService
|
|
@@ -68,6 +68,7 @@ FACTORY = {
|
|
| 68 |
ParserType.RESUME.value: resume,
|
| 69 |
ParserType.PICTURE.value: picture,
|
| 70 |
ParserType.ONE.value: one,
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
|
|
|
|
| 45 |
from io import BytesIO
|
| 46 |
import pandas as pd
|
| 47 |
|
| 48 |
+
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio
|
| 49 |
|
| 50 |
from api.db import LLMType, ParserType
|
| 51 |
from api.db.services.document_service import DocumentService
|
|
|
|
| 68 |
ParserType.RESUME.value: resume,
|
| 69 |
ParserType.PICTURE.value: picture,
|
| 70 |
ParserType.ONE.value: one,
|
| 71 |
+
ParserType.AUDIO.value: audio
|
| 72 |
}
|
| 73 |
|
| 74 |
|