KevinHuSh
commited on
Commit
·
ba9251b
1
Parent(s):
15052fd
let file in knowledgebases visible in file manager (#714)
Browse files### What problem does this PR solve?
Let file in knowledgebases visible in file manager.
#162
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/document_app.py +29 -14
- api/apps/file_app.py +8 -5
- api/db/__init__.py +8 -0
- api/db/db_models.py +21 -5
- api/db/services/document_service.py +16 -0
- api/db/services/file2document_service.py +14 -12
- api/db/services/file_service.py +88 -24
- docker/entrypoint.sh +2 -2
- rag/svr/task_executor.py +1 -0
- requirements_dev.txt +0 -2
api/apps/document_app.py
CHANGED
@@ -23,7 +23,7 @@ from elasticsearch_dsl import Q
|
|
23 |
from flask import request
|
24 |
from flask_login import login_required, current_user
|
25 |
|
26 |
-
from api.db.db_models import Task
|
27 |
from api.db.services.file2document_service import File2DocumentService
|
28 |
from api.db.services.file_service import FileService
|
29 |
from api.db.services.task_service import TaskService, queue_tasks
|
@@ -33,7 +33,7 @@ from api.db.services import duplicate_name
|
|
33 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
34 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
35 |
from api.utils import get_uuid
|
36 |
-
from api.db import FileType, TaskStatus, ParserType
|
37 |
from api.db.services.document_service import DocumentService
|
38 |
from api.settings import RetCode
|
39 |
from api.utils.api_utils import get_json_result
|
@@ -59,12 +59,19 @@ def upload():
|
|
59 |
return get_json_result(
|
60 |
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
err = []
|
63 |
for file in file_objs:
|
64 |
try:
|
65 |
-
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
66 |
-
if not e:
|
67 |
-
raise LookupError("Can't find this knowledgebase!")
|
68 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
69 |
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
70 |
raise RuntimeError("Exceed the maximum file number of a free user!")
|
@@ -99,6 +106,8 @@ def upload():
|
|
99 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
100 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
101 |
DocumentService.insert(doc)
|
|
|
|
|
102 |
except Exception as e:
|
103 |
err.append(file.filename + ": " + str(e))
|
104 |
if err:
|
@@ -228,11 +237,13 @@ def rm():
|
|
228 |
req = request.json
|
229 |
doc_ids = req["doc_id"]
|
230 |
if isinstance(doc_ids, str): doc_ids = [doc_ids]
|
|
|
|
|
|
|
231 |
errors = ""
|
232 |
for doc_id in doc_ids:
|
233 |
try:
|
234 |
e, doc = DocumentService.get_by_id(doc_id)
|
235 |
-
|
236 |
if not e:
|
237 |
return get_data_error_result(retmsg="Document not found!")
|
238 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
@@ -241,21 +252,25 @@ def rm():
|
|
241 |
|
242 |
ELASTICSEARCH.deleteByQuery(
|
243 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
244 |
-
|
245 |
-
|
|
|
|
|
246 |
if not DocumentService.delete(doc):
|
247 |
return get_data_error_result(
|
248 |
retmsg="Database error (Document removal)!")
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
except Exception as e:
|
256 |
errors += str(e)
|
257 |
|
258 |
-
if errors:
|
|
|
|
|
259 |
return get_json_result(data=True)
|
260 |
|
261 |
|
|
|
23 |
from flask import request
|
24 |
from flask_login import login_required, current_user
|
25 |
|
26 |
+
from api.db.db_models import Task, File
|
27 |
from api.db.services.file2document_service import File2DocumentService
|
28 |
from api.db.services.file_service import FileService
|
29 |
from api.db.services.task_service import TaskService, queue_tasks
|
|
|
33 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
34 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
35 |
from api.utils import get_uuid
|
36 |
+
from api.db import FileType, TaskStatus, ParserType, FileSource
|
37 |
from api.db.services.document_service import DocumentService
|
38 |
from api.settings import RetCode
|
39 |
from api.utils.api_utils import get_json_result
|
|
|
59 |
return get_json_result(
|
60 |
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
61 |
|
62 |
+
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
63 |
+
if not e:
|
64 |
+
raise LookupError("Can't find this knowledgebase!")
|
65 |
+
|
66 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
67 |
+
pf_id = root_folder["id"]
|
68 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
69 |
+
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
70 |
+
kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
|
71 |
+
|
72 |
err = []
|
73 |
for file in file_objs:
|
74 |
try:
|
|
|
|
|
|
|
75 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
76 |
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
77 |
raise RuntimeError("Exceed the maximum file number of a free user!")
|
|
|
106 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
107 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
108 |
DocumentService.insert(doc)
|
109 |
+
|
110 |
+
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
111 |
except Exception as e:
|
112 |
err.append(file.filename + ": " + str(e))
|
113 |
if err:
|
|
|
237 |
req = request.json
|
238 |
doc_ids = req["doc_id"]
|
239 |
if isinstance(doc_ids, str): doc_ids = [doc_ids]
|
240 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
241 |
+
pf_id = root_folder["id"]
|
242 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
243 |
errors = ""
|
244 |
for doc_id in doc_ids:
|
245 |
try:
|
246 |
e, doc = DocumentService.get_by_id(doc_id)
|
|
|
247 |
if not e:
|
248 |
return get_data_error_result(retmsg="Document not found!")
|
249 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
|
|
252 |
|
253 |
ELASTICSEARCH.deleteByQuery(
|
254 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
255 |
+
|
256 |
+
DocumentService.clear_chunk_num(doc_id)
|
257 |
+
b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
|
258 |
+
|
259 |
if not DocumentService.delete(doc):
|
260 |
return get_data_error_result(
|
261 |
retmsg="Database error (Document removal)!")
|
262 |
|
263 |
+
f2d = File2DocumentService.get_by_document_id(doc_id)
|
264 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
265 |
+
File2DocumentService.delete_by_document_id(doc_id)
|
266 |
+
|
267 |
+
MINIO.rm(b, n)
|
268 |
except Exception as e:
|
269 |
errors += str(e)
|
270 |
|
271 |
+
if errors:
|
272 |
+
return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
|
273 |
+
|
274 |
return get_json_result(data=True)
|
275 |
|
276 |
|
api/apps/file_app.py
CHANGED
@@ -26,7 +26,7 @@ from api.db.services.document_service import DocumentService
|
|
26 |
from api.db.services.file2document_service import File2DocumentService
|
27 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
28 |
from api.utils import get_uuid
|
29 |
-
from api.db import FileType
|
30 |
from api.db.services import duplicate_name
|
31 |
from api.db.services.file_service import FileService
|
32 |
from api.settings import RetCode
|
@@ -45,7 +45,7 @@ def upload():
|
|
45 |
|
46 |
if not pf_id:
|
47 |
root_folder = FileService.get_root_folder(current_user.id)
|
48 |
-
pf_id = root_folder
|
49 |
|
50 |
if 'file' not in request.files:
|
51 |
return get_json_result(
|
@@ -132,7 +132,7 @@ def create():
|
|
132 |
input_file_type = request.json.get("type")
|
133 |
if not pf_id:
|
134 |
root_folder = FileService.get_root_folder(current_user.id)
|
135 |
-
pf_id = root_folder
|
136 |
|
137 |
try:
|
138 |
if not FileService.is_parent_folder_exist(pf_id):
|
@@ -176,7 +176,8 @@ def list():
|
|
176 |
desc = request.args.get("desc", True)
|
177 |
if not pf_id:
|
178 |
root_folder = FileService.get_root_folder(current_user.id)
|
179 |
-
pf_id = root_folder
|
|
|
180 |
try:
|
181 |
e, file = FileService.get_by_id(pf_id)
|
182 |
if not e:
|
@@ -199,7 +200,7 @@ def list():
|
|
199 |
def get_root_folder():
|
200 |
try:
|
201 |
root_folder = FileService.get_root_folder(current_user.id)
|
202 |
-
return get_json_result(data={"root_folder": root_folder
|
203 |
except Exception as e:
|
204 |
return server_error_response(e)
|
205 |
|
@@ -250,6 +251,8 @@ def rm():
|
|
250 |
return get_data_error_result(retmsg="File or Folder not found!")
|
251 |
if not file.tenant_id:
|
252 |
return get_data_error_result(retmsg="Tenant not found!")
|
|
|
|
|
253 |
|
254 |
if file.type == FileType.FOLDER.value:
|
255 |
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
|
|
26 |
from api.db.services.file2document_service import File2DocumentService
|
27 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
28 |
from api.utils import get_uuid
|
29 |
+
from api.db import FileType, FileSource
|
30 |
from api.db.services import duplicate_name
|
31 |
from api.db.services.file_service import FileService
|
32 |
from api.settings import RetCode
|
|
|
45 |
|
46 |
if not pf_id:
|
47 |
root_folder = FileService.get_root_folder(current_user.id)
|
48 |
+
pf_id = root_folder["id"]
|
49 |
|
50 |
if 'file' not in request.files:
|
51 |
return get_json_result(
|
|
|
132 |
input_file_type = request.json.get("type")
|
133 |
if not pf_id:
|
134 |
root_folder = FileService.get_root_folder(current_user.id)
|
135 |
+
pf_id = root_folder["id"]
|
136 |
|
137 |
try:
|
138 |
if not FileService.is_parent_folder_exist(pf_id):
|
|
|
176 |
desc = request.args.get("desc", True)
|
177 |
if not pf_id:
|
178 |
root_folder = FileService.get_root_folder(current_user.id)
|
179 |
+
pf_id = root_folder["id"]
|
180 |
+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
181 |
try:
|
182 |
e, file = FileService.get_by_id(pf_id)
|
183 |
if not e:
|
|
|
200 |
def get_root_folder():
|
201 |
try:
|
202 |
root_folder = FileService.get_root_folder(current_user.id)
|
203 |
+
return get_json_result(data={"root_folder": root_folder})
|
204 |
except Exception as e:
|
205 |
return server_error_response(e)
|
206 |
|
|
|
251 |
return get_data_error_result(retmsg="File or Folder not found!")
|
252 |
if not file.tenant_id:
|
253 |
return get_data_error_result(retmsg="Tenant not found!")
|
254 |
+
if file.source_type == FileSource.KNOWLEDGEBASE:
|
255 |
+
continue
|
256 |
|
257 |
if file.type == FileType.FOLDER.value:
|
258 |
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
api/db/__init__.py
CHANGED
@@ -83,3 +83,11 @@ class ParserType(StrEnum):
|
|
83 |
NAIVE = "naive"
|
84 |
PICTURE = "picture"
|
85 |
ONE = "one"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
NAIVE = "naive"
|
84 |
PICTURE = "picture"
|
85 |
ONE = "one"
|
86 |
+
|
87 |
+
|
88 |
+
class FileSource(StrEnum):
|
89 |
+
LOCAL = ""
|
90 |
+
KNOWLEDGEBASE = "knowledgebase"
|
91 |
+
S3 = "s3"
|
92 |
+
|
93 |
+
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
api/db/db_models.py
CHANGED
@@ -21,14 +21,13 @@ import operator
|
|
21 |
from functools import wraps
|
22 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
23 |
from flask_login import UserMixin
|
24 |
-
|
25 |
from peewee import (
|
26 |
-
|
27 |
-
CompositeKey,
|
28 |
Field, Model, Metadata
|
29 |
)
|
30 |
from playhouse.pool import PooledMySQLDatabase
|
31 |
-
|
32 |
from api.db import SerializedType, ParserType
|
33 |
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
34 |
from api.utils.log_utils import getLogger
|
@@ -344,7 +343,7 @@ class DataBaseModel(BaseModel):
|
|
344 |
|
345 |
|
346 |
@DB.connection_context()
|
347 |
-
def init_database_tables():
|
348 |
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
349 |
table_objs = []
|
350 |
create_failed_list = []
|
@@ -361,6 +360,7 @@ def init_database_tables():
|
|
361 |
if create_failed_list:
|
362 |
LOGGER.info(f"create tables failed: {create_failed_list}")
|
363 |
raise Exception(f"create tables failed: {create_failed_list}")
|
|
|
364 |
|
365 |
|
366 |
def fill_db_model_object(model_object, human_model_dict):
|
@@ -699,6 +699,11 @@ class File(DataBaseModel):
|
|
699 |
help_text="where dose it store")
|
700 |
size = IntegerField(default=0)
|
701 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
|
|
|
|
|
|
|
|
|
|
702 |
|
703 |
class Meta:
|
704 |
db_table = "file"
|
@@ -817,3 +822,14 @@ class API4Conversation(DataBaseModel):
|
|
817 |
|
818 |
class Meta:
|
819 |
db_table = "api_4_conversation"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
from functools import wraps
|
22 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
23 |
from flask_login import UserMixin
|
24 |
+
from playhouse.migrate import MySQLMigrator, migrate
|
25 |
from peewee import (
|
26 |
+
BigIntegerField, BooleanField, CharField,
|
27 |
+
CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
|
28 |
Field, Model, Metadata
|
29 |
)
|
30 |
from playhouse.pool import PooledMySQLDatabase
|
|
|
31 |
from api.db import SerializedType, ParserType
|
32 |
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
33 |
from api.utils.log_utils import getLogger
|
|
|
343 |
|
344 |
|
345 |
@DB.connection_context()
|
346 |
+
def init_database_tables(alter_fields=[]):
|
347 |
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
348 |
table_objs = []
|
349 |
create_failed_list = []
|
|
|
360 |
if create_failed_list:
|
361 |
LOGGER.info(f"create tables failed: {create_failed_list}")
|
362 |
raise Exception(f"create tables failed: {create_failed_list}")
|
363 |
+
migrate_db()
|
364 |
|
365 |
|
366 |
def fill_db_model_object(model_object, human_model_dict):
|
|
|
699 |
help_text="where dose it store")
|
700 |
size = IntegerField(default=0)
|
701 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
702 |
+
source_type = CharField(
|
703 |
+
max_length=128,
|
704 |
+
null=False,
|
705 |
+
default="",
|
706 |
+
help_text="where dose this document come from")
|
707 |
|
708 |
class Meta:
|
709 |
db_table = "file"
|
|
|
822 |
|
823 |
class Meta:
|
824 |
db_table = "api_4_conversation"
|
825 |
+
|
826 |
+
|
827 |
+
def migrate_db():
|
828 |
+
try:
|
829 |
+
with DB.transaction():
|
830 |
+
migrator = MySQLMigrator(DB)
|
831 |
+
migrate(
|
832 |
+
migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", help_text="where dose this document come from"))
|
833 |
+
)
|
834 |
+
except Exception as e:
|
835 |
+
pass
|
api/db/services/document_service.py
CHANGED
@@ -150,6 +150,22 @@ class DocumentService(CommonService):
|
|
150 |
Knowledgebase.id == kb_id).execute()
|
151 |
return num
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
@classmethod
|
154 |
@DB.connection_context()
|
155 |
def get_tenant_id(cls, doc_id):
|
|
|
150 |
Knowledgebase.id == kb_id).execute()
|
151 |
return num
|
152 |
|
153 |
+
@classmethod
|
154 |
+
@DB.connection_context()
|
155 |
+
def clear_chunk_num(cls, doc_id):
|
156 |
+
doc = cls.model.get_by_id(doc_id)
|
157 |
+
assert doc, "Can't fine document in database."
|
158 |
+
|
159 |
+
num = Knowledgebase.update(
|
160 |
+
token_num=Knowledgebase.token_num -
|
161 |
+
doc.token_num,
|
162 |
+
chunk_num=Knowledgebase.chunk_num -
|
163 |
+
doc.chunk_num,
|
164 |
+
doc_num=Knowledgebase.doc_num-1
|
165 |
+
).where(
|
166 |
+
Knowledgebase.id == doc.kb_id).execute()
|
167 |
+
return num
|
168 |
+
|
169 |
@classmethod
|
170 |
@DB.connection_context()
|
171 |
def get_tenant_id(cls, doc_id):
|
api/db/services/file2document_service.py
CHANGED
@@ -15,12 +15,12 @@
|
|
15 |
#
|
16 |
from datetime import datetime
|
17 |
|
|
|
18 |
from api.db.db_models import DB
|
19 |
-
from api.db.db_models import File,
|
20 |
from api.db.services.common_service import CommonService
|
21 |
from api.db.services.document_service import DocumentService
|
22 |
-
from api.
|
23 |
-
from api.utils import current_timestamp, datetime_format
|
24 |
|
25 |
|
26 |
class File2DocumentService(CommonService):
|
@@ -71,13 +71,15 @@ class File2DocumentService(CommonService):
|
|
71 |
@DB.connection_context()
|
72 |
def get_minio_address(cls, doc_id=None, file_id=None):
|
73 |
if doc_id:
|
74 |
-
|
75 |
else:
|
76 |
-
|
77 |
-
if
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
15 |
#
|
16 |
from datetime import datetime
|
17 |
|
18 |
+
from api.db import FileSource
|
19 |
from api.db.db_models import DB
|
20 |
+
from api.db.db_models import File, File2Document
|
21 |
from api.db.services.common_service import CommonService
|
22 |
from api.db.services.document_service import DocumentService
|
23 |
+
from api.utils import current_timestamp, datetime_format, get_uuid
|
|
|
24 |
|
25 |
|
26 |
class File2DocumentService(CommonService):
|
|
|
71 |
@DB.connection_context()
|
72 |
def get_minio_address(cls, doc_id=None, file_id=None):
|
73 |
if doc_id:
|
74 |
+
f2d = cls.get_by_document_id(doc_id)
|
75 |
else:
|
76 |
+
f2d = cls.get_by_file_id(file_id)
|
77 |
+
if f2d:
|
78 |
+
file = File.get_by_id(f2d[0].file_id)
|
79 |
+
if file.source_type == FileSource.LOCAL:
|
80 |
+
return file.parent_id, file.location
|
81 |
+
doc_id = f2d[0].document_id
|
82 |
+
|
83 |
+
assert doc_id, "please specify doc_id"
|
84 |
+
e, doc = DocumentService.get_by_id(doc_id)
|
85 |
+
return doc.kb_id, doc.location
|
api/db/services/file_service.py
CHANGED
@@ -16,10 +16,12 @@
|
|
16 |
from flask_login import current_user
|
17 |
from peewee import fn
|
18 |
|
19 |
-
from api.db import FileType
|
20 |
from api.db.db_models import DB, File2Document, Knowledgebase
|
21 |
from api.db.db_models import File, Document
|
22 |
from api.db.services.common_service import CommonService
|
|
|
|
|
23 |
from api.utils import get_uuid
|
24 |
|
25 |
|
@@ -33,10 +35,15 @@ class FileService(CommonService):
|
|
33 |
if keywords:
|
34 |
files = cls.model.select().where(
|
35 |
(cls.model.tenant_id == tenant_id)
|
36 |
-
|
|
|
|
|
|
|
37 |
else:
|
38 |
-
files = cls.model.select().where((cls.model.tenant_id == tenant_id)
|
39 |
-
|
|
|
|
|
40 |
count = files.count()
|
41 |
if desc:
|
42 |
files = files.order_by(cls.model.getter_by(orderby).desc())
|
@@ -135,29 +142,69 @@ class FileService(CommonService):
|
|
135 |
@classmethod
|
136 |
@DB.connection_context()
|
137 |
def get_root_folder(cls, tenant_id):
|
138 |
-
file
|
139 |
-
cls.model.parent_id == cls.model.id)
|
140 |
-
|
141 |
-
|
142 |
-
file = {
|
143 |
-
"id": file_id,
|
144 |
-
"parent_id": file_id,
|
145 |
-
"tenant_id": tenant_id,
|
146 |
-
"created_by": tenant_id,
|
147 |
-
"name": "/",
|
148 |
-
"type": FileType.FOLDER.value,
|
149 |
-
"size": 0,
|
150 |
-
"location": "",
|
151 |
-
}
|
152 |
-
cls.save(**file)
|
153 |
-
else:
|
154 |
-
file_id = file[0].id
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return file
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
@classmethod
|
162 |
@DB.connection_context()
|
163 |
def get_parent_folder(cls, file_id):
|
@@ -241,3 +288,20 @@ class FileService(CommonService):
|
|
241 |
dfs(folder_id)
|
242 |
return size
|
243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
from flask_login import current_user
|
17 |
from peewee import fn
|
18 |
|
19 |
+
from api.db import FileType, KNOWLEDGEBASE_FOLDER_NAME, FileSource
|
20 |
from api.db.db_models import DB, File2Document, Knowledgebase
|
21 |
from api.db.db_models import File, Document
|
22 |
from api.db.services.common_service import CommonService
|
23 |
+
from api.db.services.document_service import DocumentService
|
24 |
+
from api.db.services.file2document_service import File2DocumentService
|
25 |
from api.utils import get_uuid
|
26 |
|
27 |
|
|
|
35 |
if keywords:
|
36 |
files = cls.model.select().where(
|
37 |
(cls.model.tenant_id == tenant_id)
|
38 |
+
(cls.model.parent_id == pf_id),
|
39 |
+
(fn.LOWER(cls.model.name).like(f"%%{keywords.lower()}%%")),
|
40 |
+
~(cls.model.id == pf_id)
|
41 |
+
)
|
42 |
else:
|
43 |
+
files = cls.model.select().where((cls.model.tenant_id == tenant_id),
|
44 |
+
(cls.model.parent_id == pf_id),
|
45 |
+
~(cls.model.id == pf_id)
|
46 |
+
)
|
47 |
count = files.count()
|
48 |
if desc:
|
49 |
files = files.order_by(cls.model.getter_by(orderby).desc())
|
|
|
142 |
@classmethod
|
143 |
@DB.connection_context()
|
144 |
def get_root_folder(cls, tenant_id):
|
145 |
+
for file in cls.model.select().where((cls.model.tenant_id == tenant_id),
|
146 |
+
(cls.model.parent_id == cls.model.id)
|
147 |
+
):
|
148 |
+
return file.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
file_id = get_uuid()
|
151 |
+
file = {
|
152 |
+
"id": file_id,
|
153 |
+
"parent_id": file_id,
|
154 |
+
"tenant_id": tenant_id,
|
155 |
+
"created_by": tenant_id,
|
156 |
+
"name": "/",
|
157 |
+
"type": FileType.FOLDER.value,
|
158 |
+
"size": 0,
|
159 |
+
"location": "",
|
160 |
+
}
|
161 |
+
cls.save(**file)
|
162 |
+
return file
|
163 |
+
|
164 |
+
@classmethod
|
165 |
+
@DB.connection_context()
|
166 |
+
def get_kb_folder(cls, tenant_id):
|
167 |
+
for root in cls.model.select().where(cls.model.tenant_id == tenant_id and
|
168 |
+
cls.model.parent_id == cls.model.id):
|
169 |
+
for folder in cls.model.select().where(cls.model.tenant_id == tenant_id and
|
170 |
+
cls.model.parent_id == root.id and
|
171 |
+
cls.model.name == KNOWLEDGEBASE_FOLDER_NAME
|
172 |
+
):
|
173 |
+
return folder.to_dict()
|
174 |
+
assert False, "Can't find the KB folder. Database init error."
|
175 |
+
|
176 |
+
@classmethod
|
177 |
+
@DB.connection_context()
|
178 |
+
def new_a_file_from_kb(cls, tenant_id, name, parent_id, ty=FileType.FOLDER.value, size=0, location=""):
|
179 |
+
for file in cls.query(tenant_id=tenant_id, parent_id=parent_id, name=name):
|
180 |
+
return file.to_dict()
|
181 |
+
file = {
|
182 |
+
"id": get_uuid(),
|
183 |
+
"parent_id": parent_id,
|
184 |
+
"tenant_id": tenant_id,
|
185 |
+
"created_by": tenant_id,
|
186 |
+
"name": name,
|
187 |
+
"type": ty,
|
188 |
+
"size": size,
|
189 |
+
"location": location,
|
190 |
+
"source_type": FileSource.KNOWLEDGEBASE
|
191 |
+
}
|
192 |
+
cls.save(**file)
|
193 |
return file
|
194 |
|
195 |
+
@classmethod
|
196 |
+
@DB.connection_context()
|
197 |
+
def init_knowledgebase_docs(cls, root_id, tenant_id):
|
198 |
+
for _ in cls.model.select().where((cls.model.name == KNOWLEDGEBASE_FOLDER_NAME)\
|
199 |
+
& (cls.model.parent_id == root_id)):
|
200 |
+
return
|
201 |
+
folder = cls.new_a_file_from_kb(tenant_id, KNOWLEDGEBASE_FOLDER_NAME, root_id)
|
202 |
+
|
203 |
+
for kb in Knowledgebase.select(*[Knowledgebase.id, Knowledgebase.name]).where(Knowledgebase.tenant_id==tenant_id):
|
204 |
+
kb_folder = cls.new_a_file_from_kb(tenant_id, kb.name, folder["id"])
|
205 |
+
for doc in DocumentService.query(kb_id=kb.id):
|
206 |
+
FileService.add_file_from_kb(doc.to_dict(), kb_folder["id"], tenant_id)
|
207 |
+
|
208 |
@classmethod
|
209 |
@DB.connection_context()
|
210 |
def get_parent_folder(cls, file_id):
|
|
|
288 |
dfs(folder_id)
|
289 |
return size
|
290 |
|
291 |
+
@classmethod
|
292 |
+
@DB.connection_context()
|
293 |
+
def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
|
294 |
+
for _ in File2DocumentService.get_by_document_id(doc["id"]): return
|
295 |
+
file = {
|
296 |
+
"id": get_uuid(),
|
297 |
+
"parent_id": kb_folder_id,
|
298 |
+
"tenant_id": tenant_id,
|
299 |
+
"created_by": tenant_id,
|
300 |
+
"name": doc["name"],
|
301 |
+
"type": doc["type"],
|
302 |
+
"size": doc["size"],
|
303 |
+
"location": doc["location"],
|
304 |
+
"source_type": FileSource.KNOWLEDGEBASE
|
305 |
+
}
|
306 |
+
cls.save(**file)
|
307 |
+
File2DocumentService.save(**{"id": get_uuid(), "file_id": file["id"], "document_id": doc["id"]})
|
docker/entrypoint.sh
CHANGED
@@ -8,14 +8,14 @@ PY=/root/miniconda3/envs/py11/bin/python
|
|
8 |
|
9 |
function task_exe(){
|
10 |
while [ 1 -eq 1 ];do
|
11 |
-
$PY rag/svr/task_executor.py
|
12 |
done
|
13 |
}
|
14 |
|
15 |
WS=1
|
16 |
for ((i=0;i<WS;i++))
|
17 |
do
|
18 |
-
task_exe
|
19 |
done
|
20 |
|
21 |
while [ 1 -eq 1 ];do
|
|
|
8 |
|
9 |
function task_exe(){
|
10 |
while [ 1 -eq 1 ];do
|
11 |
+
$PY rag/svr/task_executor.py ;
|
12 |
done
|
13 |
}
|
14 |
|
15 |
WS=1
|
16 |
for ((i=0;i<WS;i++))
|
17 |
do
|
18 |
+
task_exe &
|
19 |
done
|
20 |
|
21 |
while [ 1 -eq 1 ];do
|
rag/svr/task_executor.py
CHANGED
@@ -109,6 +109,7 @@ def collect():
|
|
109 |
if not msg: return pd.DataFrame()
|
110 |
|
111 |
if TaskService.do_cancel(msg["id"]):
|
|
|
112 |
return pd.DataFrame()
|
113 |
tasks = TaskService.get_tasks(msg["id"])
|
114 |
assert tasks, "{} empty task!".format(msg["id"])
|
|
|
109 |
if not msg: return pd.DataFrame()
|
110 |
|
111 |
if TaskService.do_cancel(msg["id"]):
|
112 |
+
cron_logger.info("Task {} has been canceled.".format(msg["id"]))
|
113 |
return pd.DataFrame()
|
114 |
tasks = TaskService.get_tasks(msg["id"])
|
115 |
assert tasks, "{} empty task!".format(msg["id"])
|
requirements_dev.txt
CHANGED
@@ -78,8 +78,6 @@ pycryptodomex==3.20.0
|
|
78 |
pydantic==2.6.2
|
79 |
pydantic_core==2.16.3
|
80 |
PyJWT==2.8.0
|
81 |
-
PyMuPDF==1.23.25
|
82 |
-
PyMuPDFb==1.23.22
|
83 |
PyMySQL==1.1.0
|
84 |
PyPDF2==3.0.1
|
85 |
pypdfium2==4.27.0
|
|
|
78 |
pydantic==2.6.2
|
79 |
pydantic_core==2.16.3
|
80 |
PyJWT==2.8.0
|
|
|
|
|
81 |
PyMySQL==1.1.0
|
82 |
PyPDF2==3.0.1
|
83 |
pypdfium2==4.27.0
|