KevinHuSh
commited on
Commit
·
9bf75d4
1
Parent(s):
6be3dd5
add dialog api (#33)
Browse files- api/apps/__init__.py +11 -11
- api/apps/chunk_app.py +113 -26
- api/apps/dialog_app.py +163 -0
- api/apps/document_app.py +26 -22
- api/apps/kb_app.py +10 -10
- api/apps/llm_app.py +11 -11
- api/apps/user_app.py +10 -10
- api/db/__init__.py +9 -2
- api/db/db_models.py +15 -12
- api/db/db_services.py +4 -4
- api/db/db_utils.py +5 -11
- api/db/init_data.py +5 -5
- api/db/operatioins.py +2 -2
- api/db/reload_config_base.py +1 -1
- api/db/runtime_config.py +2 -2
- api/db/services/__init__.py +1 -1
- api/db/services/common_service.py +3 -3
- api/db/services/dialog_service.py +3 -13
- api/db/services/document_service.py +7 -7
- api/db/services/kb_service.py +6 -9
- api/db/services/knowledgebase_service.py +3 -9
- api/db/services/llm_service.py +6 -9
- api/db/services/user_service.py +7 -7
- api/errors/error_services.py +1 -1
- api/errors/general_error.py +1 -1
- api/hook/__init__.py +2 -2
- api/hook/api/client_authentication.py +5 -5
- api/hook/api/permission.py +5 -5
- api/hook/api/site_authentication.py +5 -5
- api/hook/common/parameters.py +1 -1
- api/ragflow_server.py +9 -9
- api/settings.py +5 -5
- api/utils/__init__.py +1 -1
- api/utils/api_utils.py +6 -6
- api/utils/file_utils.py +2 -2
- api/utils/log_utils.py +2 -2
- api/utils/t_crypt.py +1 -1
- api/versions.py +2 -2
- rag/llm/__init__.py +1 -1
- rag/llm/chat_model.py +1 -1
- rag/llm/cv_model.py +1 -1
- rag/llm/embedding_model.py +5 -1
- rag/nlp/huqie.py +1 -1
- rag/nlp/query.py +1 -1
- rag/nlp/search.py +18 -18
- rag/nlp/synonym.py +1 -1
- rag/nlp/term_weight.py +1 -1
- rag/settings.py +4 -4
- rag/svr/parse_user_docs.py +7 -8
- rag/utils/es_conn.py +0 -1
api/apps/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -21,17 +21,17 @@ from flask import Blueprint, Flask, request
|
|
| 21 |
from werkzeug.wrappers.request import Request
|
| 22 |
from flask_cors import CORS
|
| 23 |
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
|
| 28 |
from flask_session import Session
|
| 29 |
from flask_login import LoginManager
|
| 30 |
-
from
|
| 31 |
-
from
|
| 32 |
-
from
|
| 33 |
-
from
|
| 34 |
-
from
|
| 35 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 36 |
|
| 37 |
__all__ = ['app']
|
|
@@ -68,7 +68,7 @@ def search_pages_path(pages_dir):
|
|
| 68 |
|
| 69 |
def register_page(page_path):
|
| 70 |
page_name = page_path.stem.rstrip('_app')
|
| 71 |
-
module_name = '.'.join(page_path.parts[page_path.parts.index('
|
| 72 |
|
| 73 |
spec = spec_from_file_location(module_name, page_path)
|
| 74 |
page = module_from_spec(spec)
|
|
@@ -86,7 +86,7 @@ def register_page(page_path):
|
|
| 86 |
|
| 87 |
pages_dir = [
|
| 88 |
Path(__file__).parent,
|
| 89 |
-
Path(__file__).parent.parent / '
|
| 90 |
]
|
| 91 |
|
| 92 |
client_urls_prefix = [
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 21 |
from werkzeug.wrappers.request import Request
|
| 22 |
from flask_cors import CORS
|
| 23 |
|
| 24 |
+
from api.db import StatusEnum
|
| 25 |
+
from api.db.services import UserService
|
| 26 |
+
from api.utils import CustomJSONEncoder
|
| 27 |
|
| 28 |
from flask_session import Session
|
| 29 |
from flask_login import LoginManager
|
| 30 |
+
from api.settings import RetCode, SECRET_KEY, stat_logger
|
| 31 |
+
from api.hook import HookManager
|
| 32 |
+
from api.hook.common.parameters import AuthenticationParameters, ClientAuthenticationParameters
|
| 33 |
+
from api.settings import API_VERSION, CLIENT_AUTHENTICATION, SITE_AUTHENTICATION, access_logger
|
| 34 |
+
from api.utils.api_utils import get_json_result, server_error_response
|
| 35 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 36 |
|
| 37 |
__all__ = ['app']
|
|
|
|
| 68 |
|
| 69 |
def register_page(page_path):
|
| 70 |
page_name = page_path.stem.rstrip('_app')
|
| 71 |
+
module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name, ))
|
| 72 |
|
| 73 |
spec = spec_from_file_location(module_name, page_path)
|
| 74 |
page = module_from_spec(spec)
|
|
|
|
| 86 |
|
| 87 |
pages_dir = [
|
| 88 |
Path(__file__).parent,
|
| 89 |
+
Path(__file__).parent.parent / 'api' / 'apps',
|
| 90 |
]
|
| 91 |
|
| 92 |
client_urls_prefix = [
|
api/apps/chunk_app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,31 +13,26 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import base64
|
| 17 |
import hashlib
|
| 18 |
-
import pathlib
|
| 19 |
import re
|
| 20 |
|
| 21 |
-
|
| 22 |
from flask import request
|
| 23 |
from flask_login import login_required, current_user
|
| 24 |
|
| 25 |
from rag.nlp import search, huqie
|
| 26 |
from rag.utils import ELASTICSEARCH, rmSpace
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
-
from
|
| 30 |
-
from
|
| 31 |
-
from
|
| 32 |
-
from
|
| 33 |
-
from
|
| 34 |
-
from
|
| 35 |
-
from
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
from web_server.utils.file_utils import filename_type
|
| 39 |
-
|
| 40 |
-
retrival = search.Dealer(ELASTICSEARCH, None)
|
| 41 |
|
| 42 |
@manager.route('/list', methods=['POST'])
|
| 43 |
@login_required
|
|
@@ -45,16 +40,29 @@ retrival = search.Dealer(ELASTICSEARCH, None)
|
|
| 45 |
def list():
|
| 46 |
req = request.json
|
| 47 |
doc_id = req["doc_id"]
|
| 48 |
-
page = req.get("page", 1)
|
| 49 |
-
size = req.get("size", 30)
|
| 50 |
question = req.get("keywords", "")
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
-
if not
|
| 54 |
-
|
| 55 |
-
res = retrival.search({
|
| 56 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question
|
| 57 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
return get_json_result(data=res)
|
| 59 |
except Exception as e:
|
| 60 |
if str(e).find("not_found") > 0:
|
|
@@ -102,6 +110,7 @@ def set():
|
|
| 102 |
d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
|
| 103 |
d["important_kwd"] = req["important_kwd"]
|
| 104 |
d["important_tks"] = huqie.qie(" ".join(req["important_kwd"]))
|
|
|
|
| 105 |
|
| 106 |
try:
|
| 107 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
|
@@ -116,10 +125,27 @@ def set():
|
|
| 116 |
return server_error_response(e)
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
@manager.route('/create', methods=['POST'])
|
| 120 |
@login_required
|
| 121 |
@validate_request("doc_id", "content_ltks", "important_kwd")
|
| 122 |
-
def
|
| 123 |
req = request.json
|
| 124 |
md5 = hashlib.md5()
|
| 125 |
md5.update((req["content_ltks"] + req["doc_id"]).encode("utf-8"))
|
|
@@ -148,3 +174,64 @@ def set():
|
|
| 148 |
return get_json_result(data={"chunk_id": chunck_id})
|
| 149 |
except Exception as e:
|
| 150 |
return server_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
| 16 |
import hashlib
|
|
|
|
| 17 |
import re
|
| 18 |
|
| 19 |
+
import numpy as np
|
| 20 |
from flask import request
|
| 21 |
from flask_login import login_required, current_user
|
| 22 |
|
| 23 |
from rag.nlp import search, huqie
|
| 24 |
from rag.utils import ELASTICSEARCH, rmSpace
|
| 25 |
+
from api.db import LLMType
|
| 26 |
+
from api.db.services import duplicate_name
|
| 27 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 28 |
+
from api.db.services.llm_service import TenantLLMService
|
| 29 |
+
from api.db.services.user_service import UserTenantService
|
| 30 |
+
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 31 |
+
from api.db.services.document_service import DocumentService
|
| 32 |
+
from api.settings import RetCode
|
| 33 |
+
from api.utils.api_utils import get_json_result
|
| 34 |
+
|
| 35 |
+
retrival = search.Dealer(ELASTICSEARCH)
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
@manager.route('/list', methods=['POST'])
|
| 38 |
@login_required
|
|
|
|
| 40 |
def list():
|
| 41 |
req = request.json
|
| 42 |
doc_id = req["doc_id"]
|
| 43 |
+
page = int(req.get("page", 1))
|
| 44 |
+
size = int(req.get("size", 30))
|
| 45 |
question = req.get("keywords", "")
|
| 46 |
try:
|
| 47 |
+
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
| 48 |
+
if not tenant_id: return get_data_error_result(retmsg="Tenant not found!")
|
| 49 |
+
query = {
|
|
|
|
| 50 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question
|
| 51 |
+
}
|
| 52 |
+
if "available_int" in req: query["available_int"] = int(req["available_int"])
|
| 53 |
+
sres = retrival.search(query, search.index_name(tenant_id))
|
| 54 |
+
res = {"total": sres.total, "chunks": []}
|
| 55 |
+
for id in sres.ids:
|
| 56 |
+
d = {
|
| 57 |
+
"chunk_id": id,
|
| 58 |
+
"content_ltks": rmSpace(sres.highlight[id]) if question else sres.field[id]["content_ltks"],
|
| 59 |
+
"doc_id": sres.field[id]["doc_id"],
|
| 60 |
+
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
| 61 |
+
"important_kwd": sres.field[id].get("important_kwd", []),
|
| 62 |
+
"img_id": sres.field[id].get("img_id", ""),
|
| 63 |
+
"available_int": sres.field[id].get("available_int", 1),
|
| 64 |
+
}
|
| 65 |
+
res["chunks"].append(d)
|
| 66 |
return get_json_result(data=res)
|
| 67 |
except Exception as e:
|
| 68 |
if str(e).find("not_found") > 0:
|
|
|
|
| 110 |
d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
|
| 111 |
d["important_kwd"] = req["important_kwd"]
|
| 112 |
d["important_tks"] = huqie.qie(" ".join(req["important_kwd"]))
|
| 113 |
+
if "available_int" in req: d["available_int"] = req["available_int"]
|
| 114 |
|
| 115 |
try:
|
| 116 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
|
|
|
| 125 |
return server_error_response(e)
|
| 126 |
|
| 127 |
|
| 128 |
+
@manager.route('/switch', methods=['POST'])
|
| 129 |
+
@login_required
|
| 130 |
+
@validate_request("chunk_ids", "available_int", "doc_id")
|
| 131 |
+
def switch():
|
| 132 |
+
req = request.json
|
| 133 |
+
try:
|
| 134 |
+
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
| 135 |
+
if not tenant_id: return get_data_error_result(retmsg="Tenant not found!")
|
| 136 |
+
if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]],
|
| 137 |
+
search.index_name(tenant_id)):
|
| 138 |
+
return get_data_error_result(retmsg="Index updating failure")
|
| 139 |
+
return get_json_result(data=True)
|
| 140 |
+
except Exception as e:
|
| 141 |
+
return server_error_response(e)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
@manager.route('/create', methods=['POST'])
|
| 146 |
@login_required
|
| 147 |
@validate_request("doc_id", "content_ltks", "important_kwd")
|
| 148 |
+
def create():
|
| 149 |
req = request.json
|
| 150 |
md5 = hashlib.md5()
|
| 151 |
md5.update((req["content_ltks"] + req["doc_id"]).encode("utf-8"))
|
|
|
|
| 174 |
return get_json_result(data={"chunk_id": chunck_id})
|
| 175 |
except Exception as e:
|
| 176 |
return server_error_response(e)
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
@manager.route('/retrieval_test', methods=['POST'])
|
| 180 |
+
@login_required
|
| 181 |
+
@validate_request("kb_id", "question")
|
| 182 |
+
def retrieval_test():
|
| 183 |
+
req = request.json
|
| 184 |
+
page = int(req.get("page", 1))
|
| 185 |
+
size = int(req.get("size", 30))
|
| 186 |
+
question = req["question"]
|
| 187 |
+
kb_id = req["kb_id"]
|
| 188 |
+
doc_ids = req.get("doc_ids", [])
|
| 189 |
+
similarity_threshold = float(req.get("similarity_threshold", 0.4))
|
| 190 |
+
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
| 191 |
+
top = int(req.get("top", 1024))
|
| 192 |
+
try:
|
| 193 |
+
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
| 194 |
+
if not e:
|
| 195 |
+
return get_data_error_result(retmsg="Knowledgebase not found!")
|
| 196 |
+
|
| 197 |
+
embd_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.EMBEDDING.value)
|
| 198 |
+
sres = retrival.search({"kb_ids": [kb_id], "doc_ids": doc_ids, "size": top,
|
| 199 |
+
"question": question, "vector": True,
|
| 200 |
+
"similarity": similarity_threshold},
|
| 201 |
+
search.index_name(kb.tenant_id),
|
| 202 |
+
embd_mdl)
|
| 203 |
+
|
| 204 |
+
sim, tsim, vsim = retrival.rerank(sres, question, 1-vector_similarity_weight, vector_similarity_weight)
|
| 205 |
+
idx = np.argsort(sim*-1)
|
| 206 |
+
ranks = {"total": 0, "chunks": [], "doc_aggs": {}}
|
| 207 |
+
start_idx = (page-1)*size
|
| 208 |
+
for i in idx:
|
| 209 |
+
ranks["total"] += 1
|
| 210 |
+
if sim[i] < similarity_threshold: break
|
| 211 |
+
start_idx -= 1
|
| 212 |
+
if start_idx >= 0:continue
|
| 213 |
+
if len(ranks["chunks"]) == size:continue
|
| 214 |
+
id = sres.ids[i]
|
| 215 |
+
dnm = sres.field[id]["docnm_kwd"]
|
| 216 |
+
d = {
|
| 217 |
+
"chunk_id": id,
|
| 218 |
+
"content_ltks": sres.field[id]["content_ltks"],
|
| 219 |
+
"doc_id": sres.field[id]["doc_id"],
|
| 220 |
+
"docnm_kwd": dnm,
|
| 221 |
+
"kb_id": sres.field[id]["kb_id"],
|
| 222 |
+
"important_kwd": sres.field[id].get("important_kwd", []),
|
| 223 |
+
"img_id": sres.field[id].get("img_id", ""),
|
| 224 |
+
"similarity": sim[i],
|
| 225 |
+
"vector_similarity": vsim[i],
|
| 226 |
+
"term_similarity": tsim[i]
|
| 227 |
+
}
|
| 228 |
+
ranks["chunks"].append(d)
|
| 229 |
+
if dnm not in ranks["doc_aggs"]:ranks["doc_aggs"][dnm] = 0
|
| 230 |
+
ranks["doc_aggs"][dnm] += 1
|
| 231 |
+
|
| 232 |
+
return get_json_result(data=ranks)
|
| 233 |
+
except Exception as e:
|
| 234 |
+
if str(e).find("not_found") > 0:
|
| 235 |
+
return get_json_result(data=False, retmsg=f'Index not found!',
|
| 236 |
+
retcode=RetCode.DATA_ERROR)
|
| 237 |
+
return server_error_response(e)
|
api/apps/dialog_app.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
+
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
#
|
| 16 |
+
import hashlib
|
| 17 |
+
import re
|
| 18 |
+
|
| 19 |
+
import numpy as np
|
| 20 |
+
from flask import request
|
| 21 |
+
from flask_login import login_required, current_user
|
| 22 |
+
|
| 23 |
+
from api.db.services.dialog_service import DialogService
|
| 24 |
+
from rag.nlp import search, huqie
|
| 25 |
+
from rag.utils import ELASTICSEARCH, rmSpace
|
| 26 |
+
from api.db import LLMType, StatusEnum
|
| 27 |
+
from api.db.services import duplicate_name
|
| 28 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 29 |
+
from api.db.services.llm_service import TenantLLMService
|
| 30 |
+
from api.db.services.user_service import UserTenantService, TenantService
|
| 31 |
+
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 32 |
+
from api.utils import get_uuid
|
| 33 |
+
from api.db.services.document_service import DocumentService
|
| 34 |
+
from api.settings import RetCode, stat_logger
|
| 35 |
+
from api.utils.api_utils import get_json_result
|
| 36 |
+
from rag.utils.minio_conn import MINIO
|
| 37 |
+
from api.utils.file_utils import filename_type
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@manager.route('/set', methods=['POST'])
|
| 41 |
+
@login_required
|
| 42 |
+
def set():
|
| 43 |
+
req = request.json
|
| 44 |
+
dialog_id = req.get("dialog_id")
|
| 45 |
+
name = req.get("name", "New Dialog")
|
| 46 |
+
description = req.get("description", "A helpful Dialog")
|
| 47 |
+
language = req.get("language", "Chinese")
|
| 48 |
+
llm_setting_type = req.get("llm_setting_type", "Precise")
|
| 49 |
+
llm_setting = req.get("llm_setting", {
|
| 50 |
+
"Creative": {
|
| 51 |
+
"temperature": 0.9,
|
| 52 |
+
"top_p": 0.9,
|
| 53 |
+
"frequency_penalty": 0.2,
|
| 54 |
+
"presence_penalty": 0.4,
|
| 55 |
+
"max_tokens": 512
|
| 56 |
+
},
|
| 57 |
+
"Precise": {
|
| 58 |
+
"temperature": 0.1,
|
| 59 |
+
"top_p": 0.3,
|
| 60 |
+
"frequency_penalty": 0.7,
|
| 61 |
+
"presence_penalty": 0.4,
|
| 62 |
+
"max_tokens": 215
|
| 63 |
+
},
|
| 64 |
+
"Evenly": {
|
| 65 |
+
"temperature": 0.5,
|
| 66 |
+
"top_p": 0.5,
|
| 67 |
+
"frequency_penalty": 0.7,
|
| 68 |
+
"presence_penalty": 0.4,
|
| 69 |
+
"max_tokens": 215
|
| 70 |
+
},
|
| 71 |
+
"Custom": {
|
| 72 |
+
"temperature": 0.2,
|
| 73 |
+
"top_p": 0.3,
|
| 74 |
+
"frequency_penalty": 0.6,
|
| 75 |
+
"presence_penalty": 0.3,
|
| 76 |
+
"max_tokens": 215
|
| 77 |
+
},
|
| 78 |
+
})
|
| 79 |
+
prompt_config = req.get("prompt_config", {
|
| 80 |
+
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
|
| 81 |
+
以下是知识库:
|
| 82 |
+
{knowledge}
|
| 83 |
+
以上是知识库。""",
|
| 84 |
+
"prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
|
| 85 |
+
"parameters": [
|
| 86 |
+
{"key": "knowledge", "optional": False}
|
| 87 |
+
],
|
| 88 |
+
"empty_response": "Sorry! 知识库中未找到相关内容!"
|
| 89 |
+
})
|
| 90 |
+
|
| 91 |
+
if len(prompt_config["parameters"]) < 1:
|
| 92 |
+
return get_data_error_result(retmsg="'knowledge' should be in parameters")
|
| 93 |
+
|
| 94 |
+
for p in prompt_config["parameters"]:
|
| 95 |
+
if prompt_config["system"].find("{%s}"%p["key"]) < 0:
|
| 96 |
+
return get_data_error_result(retmsg="Parameter '{}' is not used".format(p["key"]))
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
e, tenant = TenantService.get_by_id(current_user.id)
|
| 100 |
+
if not e:return get_data_error_result(retmsg="Tenant not found!")
|
| 101 |
+
llm_id = req.get("llm_id", tenant.llm_id)
|
| 102 |
+
if not dialog_id:
|
| 103 |
+
dia = {
|
| 104 |
+
"id": get_uuid(),
|
| 105 |
+
"tenant_id": current_user.id,
|
| 106 |
+
"name": name,
|
| 107 |
+
"description": description,
|
| 108 |
+
"language": language,
|
| 109 |
+
"llm_id": llm_id,
|
| 110 |
+
"llm_setting_type": llm_setting_type,
|
| 111 |
+
"llm_setting": llm_setting,
|
| 112 |
+
"prompt_config": prompt_config
|
| 113 |
+
}
|
| 114 |
+
if not DialogService.save(**dia): return get_data_error_result(retmsg="Fail to new a dialog!")
|
| 115 |
+
e, dia = DialogService.get_by_id(dia["id"])
|
| 116 |
+
if not e: return get_data_error_result(retmsg="Fail to new a dialog!")
|
| 117 |
+
return get_json_result(data=dia.to_json())
|
| 118 |
+
else:
|
| 119 |
+
del req["dialog_id"]
|
| 120 |
+
if "kb_names" in req: del req["kb_names"]
|
| 121 |
+
if not DialogService.update_by_id(dialog_id, req):
|
| 122 |
+
return get_data_error_result(retmsg="Dialog not found!")
|
| 123 |
+
e, dia = DialogService.get_by_id(dialog_id)
|
| 124 |
+
if not e: return get_data_error_result(retmsg="Fail to update a dialog!")
|
| 125 |
+
dia = dia.to_dict()
|
| 126 |
+
dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
|
| 127 |
+
return get_json_result(data=dia)
|
| 128 |
+
except Exception as e:
|
| 129 |
+
return server_error_response(e)
|
| 130 |
+
|
| 131 |
+
@manager.route('/get', methods=['GET'])
|
| 132 |
+
@login_required
|
| 133 |
+
def get():
|
| 134 |
+
dialog_id = request.args["dialog_id"]
|
| 135 |
+
try:
|
| 136 |
+
e,dia = DialogService.get_by_id(dialog_id)
|
| 137 |
+
if not e: return get_data_error_result(retmsg="Dialog not found!")
|
| 138 |
+
dia = dia.to_dict()
|
| 139 |
+
dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
|
| 140 |
+
return get_json_result(data=dia)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
return server_error_response(e)
|
| 143 |
+
|
| 144 |
+
def get_kb_names(kb_ids):
|
| 145 |
+
ids, nms = [], []
|
| 146 |
+
for kid in kb_ids:
|
| 147 |
+
e, kb = KnowledgebaseService.get_by_id(kid)
|
| 148 |
+
if not e or kb.status != StatusEnum.VALID.value:continue
|
| 149 |
+
ids.append(kid)
|
| 150 |
+
nms.append(kb.name)
|
| 151 |
+
return ids, nms
|
| 152 |
+
|
| 153 |
+
@manager.route('/list', methods=['GET'])
|
| 154 |
+
@login_required
|
| 155 |
+
def list():
|
| 156 |
+
try:
|
| 157 |
+
diags = DialogService.query(tenant_id=current_user.id, status=StatusEnum.VALID.value)
|
| 158 |
+
diags = [d.to_dict() for d in diags]
|
| 159 |
+
for d in diags:
|
| 160 |
+
d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"])
|
| 161 |
+
return get_json_result(data=diags)
|
| 162 |
+
except Exception as e:
|
| 163 |
+
return server_error_response(e)
|
api/apps/document_app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -16,22 +16,23 @@
|
|
| 16 |
import base64
|
| 17 |
import pathlib
|
| 18 |
|
|
|
|
| 19 |
from elasticsearch_dsl import Q
|
| 20 |
from flask import request
|
| 21 |
from flask_login import login_required, current_user
|
| 22 |
|
| 23 |
from rag.nlp import search
|
| 24 |
from rag.utils import ELASTICSEARCH
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
-
from
|
| 30 |
-
from
|
| 31 |
-
from
|
| 32 |
-
from
|
| 33 |
from rag.utils.minio_conn import MINIO
|
| 34 |
-
from
|
| 35 |
|
| 36 |
|
| 37 |
@manager.route('/upload', methods=['POST'])
|
|
@@ -163,21 +164,13 @@ def change_status():
|
|
| 163 |
|
| 164 |
if str(req["status"]) == "0":
|
| 165 |
ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
|
| 166 |
-
scripts=""
|
| 167 |
-
if(ctx._source.kb_id.contains('%s'))
|
| 168 |
-
ctx._source.kb_id.remove(
|
| 169 |
-
ctx._source.kb_id.indexOf('%s')
|
| 170 |
-
);
|
| 171 |
-
""" % (doc.kb_id, doc.kb_id),
|
| 172 |
idxnm=search.index_name(
|
| 173 |
kb.tenant_id)
|
| 174 |
)
|
| 175 |
else:
|
| 176 |
ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
|
| 177 |
-
scripts=""
|
| 178 |
-
if(!ctx._source.kb_id.contains('%s'))
|
| 179 |
-
ctx._source.kb_id.add('%s');
|
| 180 |
-
""" % (doc.kb_id, doc.kb_id),
|
| 181 |
idxnm=search.index_name(
|
| 182 |
kb.tenant_id)
|
| 183 |
)
|
|
@@ -195,8 +188,7 @@ def rm():
|
|
| 195 |
e, doc = DocumentService.get_by_id(req["doc_id"])
|
| 196 |
if not e:
|
| 197 |
return get_data_error_result(retmsg="Document not found!")
|
| 198 |
-
|
| 199 |
-
return get_json_result(data=False, retmsg='Remove from ES failure"', retcode=RetCode.SERVER_ERROR)
|
| 200 |
|
| 201 |
DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num*-1, doc.chunk_num*-1, 0)
|
| 202 |
if not DocumentService.delete_by_id(req["doc_id"]):
|
|
@@ -277,3 +269,15 @@ def change_parser():
|
|
| 277 |
except Exception as e:
|
| 278 |
return server_error_response(e)
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
import base64
|
| 17 |
import pathlib
|
| 18 |
|
| 19 |
+
import flask
|
| 20 |
from elasticsearch_dsl import Q
|
| 21 |
from flask import request
|
| 22 |
from flask_login import login_required, current_user
|
| 23 |
|
| 24 |
from rag.nlp import search
|
| 25 |
from rag.utils import ELASTICSEARCH
|
| 26 |
+
from api.db.services import duplicate_name
|
| 27 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 28 |
+
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 29 |
+
from api.utils import get_uuid
|
| 30 |
+
from api.db import FileType
|
| 31 |
+
from api.db.services.document_service import DocumentService
|
| 32 |
+
from api.settings import RetCode
|
| 33 |
+
from api.utils.api_utils import get_json_result
|
| 34 |
from rag.utils.minio_conn import MINIO
|
| 35 |
+
from api.utils.file_utils import filename_type
|
| 36 |
|
| 37 |
|
| 38 |
@manager.route('/upload', methods=['POST'])
|
|
|
|
| 164 |
|
| 165 |
if str(req["status"]) == "0":
|
| 166 |
ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
|
| 167 |
+
scripts="ctx._source.available_int=0;",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
idxnm=search.index_name(
|
| 169 |
kb.tenant_id)
|
| 170 |
)
|
| 171 |
else:
|
| 172 |
ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
|
| 173 |
+
scripts="ctx._source.available_int=1;",
|
|
|
|
|
|
|
|
|
|
| 174 |
idxnm=search.index_name(
|
| 175 |
kb.tenant_id)
|
| 176 |
)
|
|
|
|
| 188 |
e, doc = DocumentService.get_by_id(req["doc_id"])
|
| 189 |
if not e:
|
| 190 |
return get_data_error_result(retmsg="Document not found!")
|
| 191 |
+
ELASTICSEARCH.deleteByQuery(Q("match", doc_id=doc.id), idxnm=search.index_name(doc.kb_id))
|
|
|
|
| 192 |
|
| 193 |
DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num*-1, doc.chunk_num*-1, 0)
|
| 194 |
if not DocumentService.delete_by_id(req["doc_id"]):
|
|
|
|
| 269 |
except Exception as e:
|
| 270 |
return server_error_response(e)
|
| 271 |
|
| 272 |
+
|
| 273 |
+
@manager.route('/image/<image_id>', methods=['GET'])
|
| 274 |
+
@login_required
|
| 275 |
+
def get_image(image_id):
|
| 276 |
+
try:
|
| 277 |
+
bkt, nm = image_id.split("-")
|
| 278 |
+
response = flask.make_response(MINIO.get(bkt, nm))
|
| 279 |
+
response.headers.set('Content-Type', 'image/JPEG')
|
| 280 |
+
return response
|
| 281 |
+
except Exception as e:
|
| 282 |
+
return server_error_response(e)
|
| 283 |
+
|
api/apps/kb_app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -16,15 +16,15 @@
|
|
| 16 |
from flask import request
|
| 17 |
from flask_login import login_required, current_user
|
| 18 |
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
|
| 29 |
|
| 30 |
@manager.route('/create', methods=['post'])
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
from flask import request
|
| 17 |
from flask_login import login_required, current_user
|
| 18 |
|
| 19 |
+
from api.db.services import duplicate_name
|
| 20 |
+
from api.db.services.user_service import TenantService, UserTenantService
|
| 21 |
+
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 22 |
+
from api.utils import get_uuid, get_format_time
|
| 23 |
+
from api.db import StatusEnum, UserTenantRole
|
| 24 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 25 |
+
from api.db.db_models import Knowledgebase
|
| 26 |
+
from api.settings import stat_logger, RetCode
|
| 27 |
+
from api.utils.api_utils import get_json_result
|
| 28 |
|
| 29 |
|
| 30 |
@manager.route('/create', methods=['post'])
|
api/apps/llm_app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -16,16 +16,16 @@
|
|
| 16 |
from flask import request
|
| 17 |
from flask_login import login_required, current_user
|
| 18 |
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
|
| 30 |
|
| 31 |
@manager.route('/factories', methods=['GET'])
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
from flask import request
|
| 17 |
from flask_login import login_required, current_user
|
| 18 |
|
| 19 |
+
from api.db.services import duplicate_name
|
| 20 |
+
from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
|
| 21 |
+
from api.db.services.user_service import TenantService, UserTenantService
|
| 22 |
+
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 23 |
+
from api.utils import get_uuid, get_format_time
|
| 24 |
+
from api.db import StatusEnum, UserTenantRole
|
| 25 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 26 |
+
from api.db.db_models import Knowledgebase, TenantLLM
|
| 27 |
+
from api.settings import stat_logger, RetCode
|
| 28 |
+
from api.utils.api_utils import get_json_result
|
| 29 |
|
| 30 |
|
| 31 |
@manager.route('/factories', methods=['GET'])
|
api/apps/user_app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -17,15 +17,15 @@ from flask import request, session, redirect, url_for
|
|
| 17 |
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
from flask_login import login_required, current_user, login_user, logout_user
|
| 19 |
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
|
| 30 |
|
| 31 |
@manager.route('/login', methods=['POST', 'GET'])
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 17 |
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
from flask_login import login_required, current_user, login_user, logout_user
|
| 19 |
|
| 20 |
+
from api.db.db_models import TenantLLM
|
| 21 |
+
from api.db.services.llm_service import TenantLLMService
|
| 22 |
+
from api.utils.api_utils import server_error_response, validate_request
|
| 23 |
+
from api.utils import get_uuid, get_format_time, decrypt, download_img
|
| 24 |
+
from api.db import UserTenantRole, LLMType
|
| 25 |
+
from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS
|
| 26 |
+
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
| 27 |
+
from api.settings import stat_logger
|
| 28 |
+
from api.utils.api_utils import get_json_result, cors_reponse
|
| 29 |
|
| 30 |
|
| 31 |
@manager.route('/login', methods=['POST', 'GET'])
|
api/db/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -51,4 +51,11 @@ class LLMType(StrEnum):
|
|
| 51 |
CHAT = 'chat'
|
| 52 |
EMBEDDING = 'embedding'
|
| 53 |
SPEECH2TEXT = 'speech2text'
|
| 54 |
-
IMAGE2TEXT = 'image2text'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 51 |
CHAT = 'chat'
|
| 52 |
EMBEDDING = 'embedding'
|
| 53 |
SPEECH2TEXT = 'speech2text'
|
| 54 |
+
IMAGE2TEXT = 'image2text'
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class ChatStyle(StrEnum):
|
| 58 |
+
CREATIVE = 'Creative'
|
| 59 |
+
PRECISE = 'Precise'
|
| 60 |
+
EVENLY = 'Evenly'
|
| 61 |
+
CUSTOM = 'Custom'
|
api/db/db_models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -29,10 +29,10 @@ from peewee import (
|
|
| 29 |
)
|
| 30 |
from playhouse.pool import PooledMySQLDatabase
|
| 31 |
|
| 32 |
-
from
|
| 33 |
-
from
|
| 34 |
-
from
|
| 35 |
-
from
|
| 36 |
|
| 37 |
LOGGER = getLogger()
|
| 38 |
|
|
@@ -467,6 +467,8 @@ class Knowledgebase(DataBaseModel):
|
|
| 467 |
doc_num = IntegerField(default=0)
|
| 468 |
token_num = IntegerField(default=0)
|
| 469 |
chunk_num = IntegerField(default=0)
|
|
|
|
|
|
|
| 470 |
|
| 471 |
parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
|
| 472 |
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
|
@@ -516,19 +518,20 @@ class Dialog(DataBaseModel):
|
|
| 516 |
prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced")
|
| 517 |
prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
|
| 518 |
"parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
|
|
|
|
| 519 |
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
| 520 |
|
| 521 |
class Meta:
|
| 522 |
db_table = "dialog"
|
| 523 |
|
| 524 |
|
| 525 |
-
class DialogKb(DataBaseModel):
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
|
| 533 |
|
| 534 |
class Conversation(DataBaseModel):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 29 |
)
|
| 30 |
from playhouse.pool import PooledMySQLDatabase
|
| 31 |
|
| 32 |
+
from api.db import SerializedType
|
| 33 |
+
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
| 34 |
+
from api.utils.log_utils import getLogger
|
| 35 |
+
from api import utils
|
| 36 |
|
| 37 |
LOGGER = getLogger()
|
| 38 |
|
|
|
|
| 467 |
doc_num = IntegerField(default=0)
|
| 468 |
token_num = IntegerField(default=0)
|
| 469 |
chunk_num = IntegerField(default=0)
|
| 470 |
+
similarity_threshold = FloatField(default=0.4)
|
| 471 |
+
vector_similarity_weight = FloatField(default=0.3)
|
| 472 |
|
| 473 |
parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
|
| 474 |
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
|
|
|
| 518 |
prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced")
|
| 519 |
prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
|
| 520 |
"parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
|
| 521 |
+
kb_ids = JSONField(null=False, default=[])
|
| 522 |
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
| 523 |
|
| 524 |
class Meta:
|
| 525 |
db_table = "dialog"
|
| 526 |
|
| 527 |
|
| 528 |
+
# class DialogKb(DataBaseModel):
|
| 529 |
+
# dialog_id = CharField(max_length=32, null=False, index=True)
|
| 530 |
+
# kb_id = CharField(max_length=32, null=False)
|
| 531 |
+
#
|
| 532 |
+
# class Meta:
|
| 533 |
+
# db_table = "dialog_kb"
|
| 534 |
+
# primary_key = CompositeKey('dialog_id', 'kb_id')
|
| 535 |
|
| 536 |
|
| 537 |
class Conversation(DataBaseModel):
|
api/db/db_services.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2021 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -19,10 +19,10 @@ import time
|
|
| 19 |
from functools import wraps
|
| 20 |
from shortuuid import ShortUUID
|
| 21 |
|
| 22 |
-
from
|
| 23 |
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
GRPC_PORT, HOST, HTTP_PORT,
|
| 27 |
RANDOM_INSTANCE_ID, stat_logger,
|
| 28 |
)
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2021 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 19 |
from functools import wraps
|
| 20 |
from shortuuid import ShortUUID
|
| 21 |
|
| 22 |
+
from api.versions import get_rag_version
|
| 23 |
|
| 24 |
+
from api.errors.error_services import *
|
| 25 |
+
from api.settings import (
|
| 26 |
GRPC_PORT, HOST, HTTP_PORT,
|
| 27 |
RANDOM_INSTANCE_ID, stat_logger,
|
| 28 |
)
|
api/db/db_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -17,11 +17,11 @@ import operator
|
|
| 17 |
from functools import reduce
|
| 18 |
from typing import Dict, Type, Union
|
| 19 |
|
| 20 |
-
from
|
| 21 |
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
from enum import Enum
|
| 26 |
|
| 27 |
|
|
@@ -123,9 +123,3 @@ def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0,
|
|
| 123 |
data = data.offset(offset)
|
| 124 |
|
| 125 |
return list(data), count
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
class StatusEnum(Enum):
|
| 129 |
-
# 样本可用状态
|
| 130 |
-
VALID = "1"
|
| 131 |
-
IN_VALID = "0"
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 17 |
from functools import reduce
|
| 18 |
from typing import Dict, Type, Union
|
| 19 |
|
| 20 |
+
from api.utils import current_timestamp, timestamp_to_date
|
| 21 |
|
| 22 |
+
from api.db.db_models import DB, DataBaseModel
|
| 23 |
+
from api.db.runtime_config import RuntimeConfig
|
| 24 |
+
from api.utils.log_utils import getLogger
|
| 25 |
from enum import Enum
|
| 26 |
|
| 27 |
|
|
|
|
| 123 |
data = data.offset(offset)
|
| 124 |
|
| 125 |
return list(data), count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/db/init_data.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -16,10 +16,10 @@
|
|
| 16 |
import time
|
| 17 |
import uuid
|
| 18 |
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
|
| 24 |
|
| 25 |
def init_superuser():
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
import time
|
| 17 |
import uuid
|
| 18 |
|
| 19 |
+
from api.db import LLMType
|
| 20 |
+
from api.db.db_models import init_database_tables as init_web_db
|
| 21 |
+
from api.db.services import UserService
|
| 22 |
+
from api.db.services.llm_service import LLMFactoriesService, LLMService
|
| 23 |
|
| 24 |
|
| 25 |
def init_superuser():
|
api/db/operatioins.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -17,5 +17,5 @@
|
|
| 17 |
import operator
|
| 18 |
import time
|
| 19 |
import typing
|
| 20 |
-
from
|
| 21 |
import peewee
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 17 |
import operator
|
| 18 |
import time
|
| 19 |
import typing
|
| 20 |
+
from api.utils.log_utils import sql_logger
|
| 21 |
import peewee
|
api/db/reload_config_base.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
api/db/runtime_config.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,7 +13,7 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
from
|
| 17 |
from .reload_config_base import ReloadConfigBase
|
| 18 |
|
| 19 |
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
from api.versions import get_versions
|
| 17 |
from .reload_config_base import ReloadConfigBase
|
| 18 |
|
| 19 |
|
api/db/services/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
api/db/services/common_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -17,8 +17,8 @@ from datetime import datetime
|
|
| 17 |
|
| 18 |
import peewee
|
| 19 |
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
|
| 23 |
|
| 24 |
class CommonService:
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 17 |
|
| 18 |
import peewee
|
| 19 |
|
| 20 |
+
from api.db.db_models import DB
|
| 21 |
+
from api.utils import datetime_format
|
| 22 |
|
| 23 |
|
| 24 |
class CommonService:
|
api/db/services/dialog_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,14 +13,8 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import
|
| 17 |
-
from
|
| 18 |
-
|
| 19 |
-
from web_server.db.db_models import DB, UserTenant
|
| 20 |
-
from web_server.db.db_models import Dialog, Conversation, DialogKb
|
| 21 |
-
from web_server.db.services.common_service import CommonService
|
| 22 |
-
from web_server.utils import get_uuid, get_format_time
|
| 23 |
-
from web_server.db.db_utils import StatusEnum
|
| 24 |
|
| 25 |
|
| 26 |
class DialogService(CommonService):
|
|
@@ -29,7 +23,3 @@ class DialogService(CommonService):
|
|
| 29 |
|
| 30 |
class ConversationService(CommonService):
|
| 31 |
model = Conversation
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
class DialogKbService(CommonService):
|
| 35 |
-
model = DialogKb
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
from api.db.db_models import Dialog, Conversation
|
| 17 |
+
from api.db.services.common_service import CommonService
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
class DialogService(CommonService):
|
|
|
|
| 23 |
|
| 24 |
class ConversationService(CommonService):
|
| 25 |
model = Conversation
|
|
|
|
|
|
|
|
|
|
|
|
api/db/services/document_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -15,12 +15,12 @@
|
|
| 15 |
#
|
| 16 |
from peewee import Expression
|
| 17 |
|
| 18 |
-
from
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
|
| 25 |
|
| 26 |
class DocumentService(CommonService):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 15 |
#
|
| 16 |
from peewee import Expression
|
| 17 |
|
| 18 |
+
from api.db import TenantPermission, FileType
|
| 19 |
+
from api.db.db_models import DB, Knowledgebase, Tenant
|
| 20 |
+
from api.db.db_models import Document
|
| 21 |
+
from api.db.services.common_service import CommonService
|
| 22 |
+
from api.db.services.kb_service import KnowledgebaseService
|
| 23 |
+
from api.db import StatusEnum
|
| 24 |
|
| 25 |
|
| 26 |
class DocumentService(CommonService):
|
api/db/services/kb_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,15 +13,12 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import peewee
|
| 17 |
-
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from web_server.db.db_utils import StatusEnum
|
| 25 |
|
| 26 |
|
| 27 |
class KnowledgebaseService(CommonService):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
from api.db import TenantPermission
|
| 18 |
+
from api.db.db_models import DB, Tenant
|
| 19 |
+
from api.db.db_models import Knowledgebase
|
| 20 |
+
from api.db.services.common_service import CommonService
|
| 21 |
+
from api.db import StatusEnum
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
class KnowledgebaseService(CommonService):
|
api/db/services/knowledgebase_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,14 +13,8 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import
|
| 17 |
-
from
|
| 18 |
-
|
| 19 |
-
from web_server.db.db_models import DB, UserTenant
|
| 20 |
-
from web_server.db.db_models import Knowledgebase, Document
|
| 21 |
-
from web_server.db.services.common_service import CommonService
|
| 22 |
-
from web_server.utils import get_uuid, get_format_time
|
| 23 |
-
from web_server.db.db_utils import StatusEnum
|
| 24 |
|
| 25 |
|
| 26 |
class KnowledgebaseService(CommonService):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
from api.db.db_models import Knowledgebase, Document
|
| 17 |
+
from api.db.services.common_service import CommonService
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
class KnowledgebaseService(CommonService):
|
api/db/services/llm_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -13,15 +13,12 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import peewee
|
| 17 |
-
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
-
|
| 19 |
from rag.llm import EmbeddingModel, CvModel
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
|
| 26 |
|
| 27 |
class LLMFactoriesService(CommonService):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
|
|
|
| 16 |
from rag.llm import EmbeddingModel, CvModel
|
| 17 |
+
from api.db import LLMType
|
| 18 |
+
from api.db.db_models import DB, UserTenant
|
| 19 |
+
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
| 20 |
+
from api.db.services.common_service import CommonService
|
| 21 |
+
from api.db import StatusEnum
|
| 22 |
|
| 23 |
|
| 24 |
class LLMFactoriesService(CommonService):
|
api/db/services/user_service.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -16,12 +16,12 @@
|
|
| 16 |
import peewee
|
| 17 |
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
|
| 26 |
|
| 27 |
class UserService(CommonService):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 16 |
import peewee
|
| 17 |
from werkzeug.security import generate_password_hash, check_password_hash
|
| 18 |
|
| 19 |
+
from api.db import UserTenantRole
|
| 20 |
+
from api.db.db_models import DB, UserTenant
|
| 21 |
+
from api.db.db_models import User, Tenant
|
| 22 |
+
from api.db.services.common_service import CommonService
|
| 23 |
+
from api.utils import get_uuid, get_format_time
|
| 24 |
+
from api.db import StatusEnum
|
| 25 |
|
| 26 |
|
| 27 |
class UserService(CommonService):
|
api/errors/error_services.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from
|
| 2 |
|
| 3 |
__all__ = ['ServicesError', 'ServiceNotSupported', 'ZooKeeperNotConfigured',
|
| 4 |
'MissingZooKeeperUsernameOrPassword', 'ZooKeeperBackendError']
|
|
|
|
| 1 |
+
from api.errors import RagFlowError
|
| 2 |
|
| 3 |
__all__ = ['ServicesError', 'ServiceNotSupported', 'ZooKeeperNotConfigured',
|
| 4 |
'MissingZooKeeperUsernameOrPassword', 'ZooKeeperBackendError']
|
api/errors/general_error.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
api/hook/__init__.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import importlib
|
| 2 |
|
| 3 |
-
from
|
| 4 |
SignatureReturn, AuthenticationReturn, PermissionReturn, ClientAuthenticationReturn, ClientAuthenticationParameters
|
| 5 |
-
from
|
| 6 |
|
| 7 |
|
| 8 |
class HookManager:
|
|
|
|
| 1 |
import importlib
|
| 2 |
|
| 3 |
+
from api.hook.common.parameters import SignatureParameters, AuthenticationParameters, \
|
| 4 |
SignatureReturn, AuthenticationReturn, PermissionReturn, ClientAuthenticationReturn, ClientAuthenticationParameters
|
| 5 |
+
from api.settings import HOOK_MODULE, stat_logger,RetCode
|
| 6 |
|
| 7 |
|
| 8 |
class HookManager:
|
api/hook/api/client_authentication.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
|
| 9 |
|
| 10 |
@HookManager.register_client_authentication_hook
|
|
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
+
from api.db.service_registry import ServiceRegistry
|
| 4 |
+
from api.settings import RegistryServiceName
|
| 5 |
+
from api.hook import HookManager
|
| 6 |
+
from api.hook.common.parameters import ClientAuthenticationParameters, ClientAuthenticationReturn
|
| 7 |
+
from api.settings import HOOK_SERVER_NAME
|
| 8 |
|
| 9 |
|
| 10 |
@HookManager.register_client_authentication_hook
|
api/hook/api/permission.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
|
| 9 |
|
| 10 |
@HookManager.register_permission_check_hook
|
|
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
+
from api.db.service_registry import ServiceRegistry
|
| 4 |
+
from api.settings import RegistryServiceName
|
| 5 |
+
from api.hook import HookManager
|
| 6 |
+
from api.hook.common.parameters import PermissionCheckParameters, PermissionReturn
|
| 7 |
+
from api.settings import HOOK_SERVER_NAME
|
| 8 |
|
| 9 |
|
| 10 |
@HookManager.register_permission_check_hook
|
api/hook/api/site_authentication.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
SignatureReturn
|
| 8 |
-
from
|
| 9 |
|
| 10 |
|
| 11 |
@HookManager.register_site_signature_hook
|
|
|
|
| 1 |
import requests
|
| 2 |
|
| 3 |
+
from api.db.service_registry import ServiceRegistry
|
| 4 |
+
from api.settings import RegistryServiceName
|
| 5 |
+
from api.hook import HookManager
|
| 6 |
+
from api.hook.common.parameters import SignatureParameters, AuthenticationParameters, AuthenticationReturn,\
|
| 7 |
SignatureReturn
|
| 8 |
+
from api.settings import HOOK_SERVER_NAME, PARTY_ID
|
| 9 |
|
| 10 |
|
| 11 |
@HookManager.register_site_signature_hook
|
api/hook/common/parameters.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from
|
| 2 |
|
| 3 |
|
| 4 |
class ParametersBase:
|
|
|
|
| 1 |
+
from api.settings import RetCode
|
| 2 |
|
| 3 |
|
| 4 |
class ParametersBase:
|
api/ragflow_server.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -23,17 +23,17 @@ import traceback
|
|
| 23 |
|
| 24 |
from werkzeug.serving import run_simple
|
| 25 |
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
-
from
|
| 30 |
HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
|
| 31 |
)
|
| 32 |
-
from
|
| 33 |
|
| 34 |
-
from
|
| 35 |
-
from
|
| 36 |
-
from
|
| 37 |
|
| 38 |
if __name__ == '__main__':
|
| 39 |
stat_logger.info(
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 23 |
|
| 24 |
from werkzeug.serving import run_simple
|
| 25 |
|
| 26 |
+
from api.apps import app
|
| 27 |
+
from api.db.runtime_config import RuntimeConfig
|
| 28 |
+
from api.hook import HookManager
|
| 29 |
+
from api.settings import (
|
| 30 |
HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
|
| 31 |
)
|
| 32 |
+
from api import utils
|
| 33 |
|
| 34 |
+
from api.db.db_models import init_database_tables as init_web_db
|
| 35 |
+
from api.db.init_data import init_web_data
|
| 36 |
+
from api.versions import get_versions
|
| 37 |
|
| 38 |
if __name__ == '__main__':
|
| 39 |
stat_logger.info(
|
api/settings.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -17,9 +17,9 @@ import os
|
|
| 17 |
|
| 18 |
from enum import IntEnum, Enum
|
| 19 |
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
|
| 24 |
|
| 25 |
# Server
|
|
@@ -71,7 +71,7 @@ PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
|
|
| 71 |
DATABASE = decrypt_database_config()
|
| 72 |
|
| 73 |
# Logger
|
| 74 |
-
LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "
|
| 75 |
# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
|
| 76 |
LoggerFactory.LEVEL = 10
|
| 77 |
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 17 |
|
| 18 |
from enum import IntEnum, Enum
|
| 19 |
|
| 20 |
+
from api.utils import get_base_config,decrypt_database_config
|
| 21 |
+
from api.utils.file_utils import get_project_base_directory
|
| 22 |
+
from api.utils.log_utils import LoggerFactory, getLogger
|
| 23 |
|
| 24 |
|
| 25 |
# Server
|
|
|
|
| 71 |
DATABASE = decrypt_database_config()
|
| 72 |
|
| 73 |
# Logger
|
| 74 |
+
LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
|
| 75 |
# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
|
| 76 |
LoggerFactory.LEVEL = 10
|
| 77 |
|
api/utils/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
api/utils/api_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -24,16 +24,16 @@ from flask import (
|
|
| 24 |
)
|
| 25 |
from werkzeug.http import HTTP_STATUS_CODES
|
| 26 |
|
| 27 |
-
from
|
| 28 |
-
from
|
| 29 |
-
from
|
| 30 |
-
from
|
| 31 |
REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
|
| 32 |
stat_logger,CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
|
| 33 |
)
|
| 34 |
import requests
|
| 35 |
import functools
|
| 36 |
-
from
|
| 37 |
from uuid import uuid1
|
| 38 |
from base64 import b64encode
|
| 39 |
from hmac import HMAC
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 24 |
)
|
| 25 |
from werkzeug.http import HTTP_STATUS_CODES
|
| 26 |
|
| 27 |
+
from api.utils import json_dumps
|
| 28 |
+
from api.versions import get_rag_version
|
| 29 |
+
from api.settings import RetCode
|
| 30 |
+
from api.settings import (
|
| 31 |
REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
|
| 32 |
stat_logger,CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
|
| 33 |
)
|
| 34 |
import requests
|
| 35 |
import functools
|
| 36 |
+
from api.utils import CustomJSONEncoder
|
| 37 |
from uuid import uuid1
|
| 38 |
from base64 import b64encode
|
| 39 |
from hmac import HMAC
|
api/utils/file_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -21,7 +21,7 @@ import re
|
|
| 21 |
from cachetools import LRUCache, cached
|
| 22 |
from ruamel.yaml import YAML
|
| 23 |
|
| 24 |
-
from
|
| 25 |
|
| 26 |
PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
|
| 27 |
RAG_BASE = os.getenv("RAG_BASE")
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 21 |
from cachetools import LRUCache, cached
|
| 22 |
from ruamel.yaml import YAML
|
| 23 |
|
| 24 |
+
from api.db import FileType
|
| 25 |
|
| 26 |
PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
|
| 27 |
RAG_BASE = os.getenv("RAG_BASE")
|
api/utils/log_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -21,7 +21,7 @@ import inspect
|
|
| 21 |
from logging.handlers import TimedRotatingFileHandler
|
| 22 |
from threading import RLock
|
| 23 |
|
| 24 |
-
from
|
| 25 |
|
| 26 |
class LoggerFactory(object):
|
| 27 |
TYPE = "FILE"
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 21 |
from logging.handlers import TimedRotatingFileHandler
|
| 22 |
from threading import RLock
|
| 23 |
|
| 24 |
+
from api.utils import file_utils
|
| 25 |
|
| 26 |
class LoggerFactory(object):
|
| 27 |
TYPE = "FILE"
|
api/utils/t_crypt.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import base64, os, sys
|
| 2 |
from Cryptodome.PublicKey import RSA
|
| 3 |
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
| 4 |
-
from
|
| 5 |
|
| 6 |
def crypt(line):
|
| 7 |
file_path = os.path.join(file_utils.get_project_base_directory(), "conf", "public.pem")
|
|
|
|
| 1 |
import base64, os, sys
|
| 2 |
from Cryptodome.PublicKey import RSA
|
| 3 |
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
| 4 |
+
from api.utils import decrypt, file_utils
|
| 5 |
|
| 6 |
def crypt(line):
|
| 7 |
file_path = os.path.join(file_utils.get_project_base_directory(), "conf", "public.pem")
|
api/versions.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -18,7 +18,7 @@ import os
|
|
| 18 |
import dotenv
|
| 19 |
import typing
|
| 20 |
|
| 21 |
-
from
|
| 22 |
|
| 23 |
|
| 24 |
def get_versions() -> typing.Mapping[str, typing.Any]:
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 18 |
import dotenv
|
| 19 |
import typing
|
| 20 |
|
| 21 |
+
from api.utils.file_utils import get_project_base_directory
|
| 22 |
|
| 23 |
|
| 24 |
def get_versions() -> typing.Mapping[str, typing.Any]:
|
rag/llm/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
rag/llm/chat_model.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
rag/llm/cv_model.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
rag/llm/embedding_model.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -60,6 +60,10 @@ class HuEmbedding(Base):
|
|
| 60 |
res.extend(self.model.encode(texts[i:i + batch_size]).tolist())
|
| 61 |
return np.array(res), token_count
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
class OpenAIEmbed(Base):
|
| 65 |
def __init__(self, key, model_name="text-embedding-ada-002"):
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 60 |
res.extend(self.model.encode(texts[i:i + batch_size]).tolist())
|
| 61 |
return np.array(res), token_count
|
| 62 |
|
| 63 |
+
def encode_queries(self, text: str):
|
| 64 |
+
token_count = num_tokens_from_string(text)
|
| 65 |
+
return self.model.encode_queries([text]).tolist()[0], token_count
|
| 66 |
+
|
| 67 |
|
| 68 |
class OpenAIEmbed(Base):
|
| 69 |
def __init__(self, key, model_name="text-embedding-ada-002"):
|
rag/nlp/huqie.py
CHANGED
|
@@ -9,7 +9,7 @@ import string
|
|
| 9 |
import sys
|
| 10 |
from hanziconv import HanziConv
|
| 11 |
|
| 12 |
-
from
|
| 13 |
|
| 14 |
|
| 15 |
class Huqie:
|
|
|
|
| 9 |
import sys
|
| 10 |
from hanziconv import HanziConv
|
| 11 |
|
| 12 |
+
from api.utils.file_utils import get_project_base_directory
|
| 13 |
|
| 14 |
|
| 15 |
class Huqie:
|
rag/nlp/query.py
CHANGED
|
@@ -147,7 +147,7 @@ class EsQueryer:
|
|
| 147 |
atks = toDict(atks)
|
| 148 |
btkss = [toDict(tks) for tks in btkss]
|
| 149 |
tksim = [self.similarity(atks, btks) for btks in btkss]
|
| 150 |
-
return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight
|
| 151 |
|
| 152 |
def similarity(self, qtwt, dtwt):
|
| 153 |
if isinstance(dtwt, type("")):
|
|
|
|
| 147 |
atks = toDict(atks)
|
| 148 |
btkss = [toDict(tks) for tks in btkss]
|
| 149 |
tksim = [self.similarity(atks, btks) for btks in btkss]
|
| 150 |
+
return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, sims[0], tksim
|
| 151 |
|
| 152 |
def similarity(self, qtwt, dtwt):
|
| 153 |
if isinstance(dtwt, type("")):
|
rag/nlp/search.py
CHANGED
|
@@ -15,7 +15,7 @@ def index_name(uid): return f"ragflow_{uid}"
|
|
| 15 |
|
| 16 |
|
| 17 |
class Dealer:
|
| 18 |
-
def __init__(self, es
|
| 19 |
self.qryr = query.EsQueryer(es)
|
| 20 |
self.qryr.flds = [
|
| 21 |
"title_tks^10",
|
|
@@ -23,7 +23,6 @@ class Dealer:
|
|
| 23 |
"content_ltks^2",
|
| 24 |
"content_sm_ltks"]
|
| 25 |
self.es = es
|
| 26 |
-
self.emb_mdl = emb_mdl
|
| 27 |
|
| 28 |
@dataclass
|
| 29 |
class SearchResult:
|
|
@@ -36,23 +35,26 @@ class Dealer:
|
|
| 36 |
keywords: Optional[List[str]] = None
|
| 37 |
group_docs: List[List] = None
|
| 38 |
|
| 39 |
-
def _vector(self, txt, sim=0.8, topk=10):
|
| 40 |
-
qv =
|
| 41 |
return {
|
| 42 |
"field": "q_%d_vec"%len(qv),
|
| 43 |
"k": topk,
|
| 44 |
"similarity": sim,
|
| 45 |
-
"num_candidates":
|
| 46 |
"query_vector": qv
|
| 47 |
}
|
| 48 |
|
| 49 |
-
def search(self, req, idxnm,
|
| 50 |
qst = req.get("question", "")
|
| 51 |
bqry, keywords = self.qryr.question(qst)
|
| 52 |
if req.get("kb_ids"):
|
| 53 |
bqry.filter.append(Q("terms", kb_id=req["kb_ids"]))
|
| 54 |
if req.get("doc_ids"):
|
| 55 |
bqry.filter.append(Q("terms", doc_id=req["doc_ids"]))
|
|
|
|
|
|
|
|
|
|
| 56 |
bqry.boost = 0.05
|
| 57 |
|
| 58 |
s = Search()
|
|
@@ -60,7 +62,7 @@ class Dealer:
|
|
| 60 |
ps = int(req.get("size", 1000))
|
| 61 |
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id","img_id",
|
| 62 |
"image_id", "doc_id", "q_512_vec", "q_768_vec",
|
| 63 |
-
"q_1024_vec", "q_1536_vec"])
|
| 64 |
|
| 65 |
s = s.query(bqry)[pg * ps:(pg + 1) * ps]
|
| 66 |
s = s.highlight("content_ltks")
|
|
@@ -80,7 +82,8 @@ class Dealer:
|
|
| 80 |
s = s.to_dict()
|
| 81 |
q_vec = []
|
| 82 |
if req.get("vector"):
|
| 83 |
-
|
|
|
|
| 84 |
s["knn"]["filter"] = bqry.to_dict()
|
| 85 |
if "highlight" in s: del s["highlight"]
|
| 86 |
q_vec = s["knn"]["query_vector"]
|
|
@@ -168,7 +171,7 @@ class Dealer:
|
|
| 168 |
def trans2floats(txt):
|
| 169 |
return [float(t) for t in txt.split("\t")]
|
| 170 |
|
| 171 |
-
def insert_citations(self, ans, top_idx, sres,
|
| 172 |
vfield="q_vec", cfield="content_ltks"):
|
| 173 |
|
| 174 |
ins_embd = [Dealer.trans2floats(
|
|
@@ -179,15 +182,14 @@ class Dealer:
|
|
| 179 |
res = ""
|
| 180 |
|
| 181 |
def citeit():
|
| 182 |
-
nonlocal s, e, ans, res
|
| 183 |
if not ins_embd:
|
| 184 |
return
|
| 185 |
-
embd =
|
| 186 |
sim = self.qryr.hybrid_similarity(embd,
|
| 187 |
ins_embd,
|
| 188 |
huqie.qie(ans[s:e]).split(" "),
|
| 189 |
ins_tw)
|
| 190 |
-
print(ans[s: e], sim)
|
| 191 |
mx = np.max(sim) * 0.99
|
| 192 |
if mx < 0.55:
|
| 193 |
return
|
|
@@ -225,20 +227,18 @@ class Dealer:
|
|
| 225 |
|
| 226 |
return res
|
| 227 |
|
| 228 |
-
def rerank(self, sres, query, tkweight=0.3, vtweight=0.7,
|
| 229 |
-
vfield="q_vec", cfield="content_ltks"):
|
| 230 |
ins_embd = [
|
| 231 |
Dealer.trans2floats(
|
| 232 |
-
sres.field[i]["
|
| 233 |
if not ins_embd:
|
| 234 |
return []
|
| 235 |
ins_tw = [sres.field[i][cfield].split(" ") for i in sres.ids]
|
| 236 |
-
|
| 237 |
-
sim = self.qryr.hybrid_similarity(sres.query_vector,
|
| 238 |
ins_embd,
|
| 239 |
huqie.qie(query).split(" "),
|
| 240 |
ins_tw, tkweight, vtweight)
|
| 241 |
-
return sim
|
| 242 |
|
| 243 |
|
| 244 |
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class Dealer:
|
| 18 |
+
def __init__(self, es):
|
| 19 |
self.qryr = query.EsQueryer(es)
|
| 20 |
self.qryr.flds = [
|
| 21 |
"title_tks^10",
|
|
|
|
| 23 |
"content_ltks^2",
|
| 24 |
"content_sm_ltks"]
|
| 25 |
self.es = es
|
|
|
|
| 26 |
|
| 27 |
@dataclass
|
| 28 |
class SearchResult:
|
|
|
|
| 35 |
keywords: Optional[List[str]] = None
|
| 36 |
group_docs: List[List] = None
|
| 37 |
|
| 38 |
+
def _vector(self, txt, emb_mdl, sim=0.8, topk=10):
|
| 39 |
+
qv, c = emb_mdl.encode_queries(txt)
|
| 40 |
return {
|
| 41 |
"field": "q_%d_vec"%len(qv),
|
| 42 |
"k": topk,
|
| 43 |
"similarity": sim,
|
| 44 |
+
"num_candidates": topk*2,
|
| 45 |
"query_vector": qv
|
| 46 |
}
|
| 47 |
|
| 48 |
+
def search(self, req, idxnm, emb_mdl=None):
|
| 49 |
qst = req.get("question", "")
|
| 50 |
bqry, keywords = self.qryr.question(qst)
|
| 51 |
if req.get("kb_ids"):
|
| 52 |
bqry.filter.append(Q("terms", kb_id=req["kb_ids"]))
|
| 53 |
if req.get("doc_ids"):
|
| 54 |
bqry.filter.append(Q("terms", doc_id=req["doc_ids"]))
|
| 55 |
+
if "available_int" in req:
|
| 56 |
+
if req["available_int"] == 0: bqry.filter.append(Q("range", available_int={"lt": 1}))
|
| 57 |
+
else: bqry.filter.append(Q("bool", must_not=Q("range", available_int={"lt": 1})))
|
| 58 |
bqry.boost = 0.05
|
| 59 |
|
| 60 |
s = Search()
|
|
|
|
| 62 |
ps = int(req.get("size", 1000))
|
| 63 |
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id","img_id",
|
| 64 |
"image_id", "doc_id", "q_512_vec", "q_768_vec",
|
| 65 |
+
"q_1024_vec", "q_1536_vec", "available_int"])
|
| 66 |
|
| 67 |
s = s.query(bqry)[pg * ps:(pg + 1) * ps]
|
| 68 |
s = s.highlight("content_ltks")
|
|
|
|
| 82 |
s = s.to_dict()
|
| 83 |
q_vec = []
|
| 84 |
if req.get("vector"):
|
| 85 |
+
assert emb_mdl, "No embedding model selected"
|
| 86 |
+
s["knn"] = self._vector(qst, emb_mdl, req.get("similarity", 0.4), ps)
|
| 87 |
s["knn"]["filter"] = bqry.to_dict()
|
| 88 |
if "highlight" in s: del s["highlight"]
|
| 89 |
q_vec = s["knn"]["query_vector"]
|
|
|
|
| 171 |
def trans2floats(txt):
|
| 172 |
return [float(t) for t in txt.split("\t")]
|
| 173 |
|
| 174 |
+
def insert_citations(self, ans, top_idx, sres, emb_mdl,
|
| 175 |
vfield="q_vec", cfield="content_ltks"):
|
| 176 |
|
| 177 |
ins_embd = [Dealer.trans2floats(
|
|
|
|
| 182 |
res = ""
|
| 183 |
|
| 184 |
def citeit():
|
| 185 |
+
nonlocal s, e, ans, res, emb_mdl
|
| 186 |
if not ins_embd:
|
| 187 |
return
|
| 188 |
+
embd = emb_mdl.encode(ans[s: e])
|
| 189 |
sim = self.qryr.hybrid_similarity(embd,
|
| 190 |
ins_embd,
|
| 191 |
huqie.qie(ans[s:e]).split(" "),
|
| 192 |
ins_tw)
|
|
|
|
| 193 |
mx = np.max(sim) * 0.99
|
| 194 |
if mx < 0.55:
|
| 195 |
return
|
|
|
|
| 227 |
|
| 228 |
return res
|
| 229 |
|
| 230 |
+
def rerank(self, sres, query, tkweight=0.3, vtweight=0.7, cfield="content_ltks"):
|
|
|
|
| 231 |
ins_embd = [
|
| 232 |
Dealer.trans2floats(
|
| 233 |
+
sres.field[i]["q_%d_vec"%len(sres.query_vector)]) for i in sres.ids]
|
| 234 |
if not ins_embd:
|
| 235 |
return []
|
| 236 |
ins_tw = [sres.field[i][cfield].split(" ") for i in sres.ids]
|
| 237 |
+
sim, tksim, vtsim = self.qryr.hybrid_similarity(sres.query_vector,
|
|
|
|
| 238 |
ins_embd,
|
| 239 |
huqie.qie(query).split(" "),
|
| 240 |
ins_tw, tkweight, vtweight)
|
| 241 |
+
return sim, tksim, vtsim
|
| 242 |
|
| 243 |
|
| 244 |
|
rag/nlp/synonym.py
CHANGED
|
@@ -4,7 +4,7 @@ import time
|
|
| 4 |
import logging
|
| 5 |
import re
|
| 6 |
|
| 7 |
-
from
|
| 8 |
|
| 9 |
|
| 10 |
class Dealer:
|
|
|
|
| 4 |
import logging
|
| 5 |
import re
|
| 6 |
|
| 7 |
+
from api.utils.file_utils import get_project_base_directory
|
| 8 |
|
| 9 |
|
| 10 |
class Dealer:
|
rag/nlp/term_weight.py
CHANGED
|
@@ -5,7 +5,7 @@ import re
|
|
| 5 |
import os
|
| 6 |
import numpy as np
|
| 7 |
from rag.nlp import huqie
|
| 8 |
-
from
|
| 9 |
|
| 10 |
|
| 11 |
class Dealer:
|
|
|
|
| 5 |
import os
|
| 6 |
import numpy as np
|
| 7 |
from rag.nlp import huqie
|
| 8 |
+
from api.utils.file_utils import get_project_base_directory
|
| 9 |
|
| 10 |
|
| 11 |
class Dealer:
|
rag/settings.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -14,9 +14,9 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import os
|
| 17 |
-
from
|
| 18 |
-
from
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
# Server
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import os
|
| 17 |
+
from api.utils import get_base_config,decrypt_database_config
|
| 18 |
+
from api.utils.file_utils import get_project_base_directory
|
| 19 |
+
from api.utils.log_utils import LoggerFactory, getLogger
|
| 20 |
|
| 21 |
|
| 22 |
# Server
|
rag/svr/parse_user_docs.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#
|
| 2 |
-
# Copyright 2019 The
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
@@ -47,12 +47,12 @@ from rag.nlp.huchunk import (
|
|
| 47 |
PptChunker,
|
| 48 |
TextChunker
|
| 49 |
)
|
| 50 |
-
from
|
| 51 |
-
from
|
| 52 |
-
from
|
| 53 |
-
from
|
| 54 |
-
from
|
| 55 |
-
from
|
| 56 |
|
| 57 |
BATCH_SIZE = 64
|
| 58 |
|
|
@@ -257,7 +257,6 @@ def main(comm, mod):
|
|
| 257 |
cron_logger.error(str(e))
|
| 258 |
continue
|
| 259 |
|
| 260 |
-
|
| 261 |
set_progress(r["id"], random.randint(70, 95) / 100.,
|
| 262 |
"Finished embedding! Start to build index!")
|
| 263 |
init_kb(r)
|
|
|
|
| 1 |
#
|
| 2 |
+
# Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
#
|
| 4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 47 |
PptChunker,
|
| 48 |
TextChunker
|
| 49 |
)
|
| 50 |
+
from api.db import LLMType
|
| 51 |
+
from api.db.services.document_service import DocumentService
|
| 52 |
+
from api.db.services.llm_service import TenantLLMService
|
| 53 |
+
from api.settings import database_logger
|
| 54 |
+
from api.utils import get_format_time
|
| 55 |
+
from api.utils.file_utils import get_project_base_directory
|
| 56 |
|
| 57 |
BATCH_SIZE = 64
|
| 58 |
|
|
|
|
| 257 |
cron_logger.error(str(e))
|
| 258 |
continue
|
| 259 |
|
|
|
|
| 260 |
set_progress(r["id"], random.randint(70, 95) / 100.,
|
| 261 |
"Finished embedding! Start to build index!")
|
| 262 |
init_kb(r)
|
rag/utils/es_conn.py
CHANGED
|
@@ -66,7 +66,6 @@ class HuEs:
|
|
| 66 |
body=d,
|
| 67 |
id=id,
|
| 68 |
refresh=False,
|
| 69 |
-
doc_type="_doc",
|
| 70 |
retry_on_conflict=100)
|
| 71 |
es_logger.info("Successfully upsert: %s" % id)
|
| 72 |
T = True
|
|
|
|
| 66 |
body=d,
|
| 67 |
id=id,
|
| 68 |
refresh=False,
|
|
|
|
| 69 |
retry_on_conflict=100)
|
| 70 |
es_logger.info("Successfully upsert: %s" % id)
|
| 71 |
T = True
|