KevinHuSh commited on
Commit
ba9251b
·
1 Parent(s): 15052fd

let file in knowledgebases visible in file manager (#714)

Browse files

### What problem does this PR solve?

Let file in knowledgebases visible in file manager.
#162

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

api/apps/document_app.py CHANGED
@@ -23,7 +23,7 @@ from elasticsearch_dsl import Q
23
  from flask import request
24
  from flask_login import login_required, current_user
25
 
26
- from api.db.db_models import Task
27
  from api.db.services.file2document_service import File2DocumentService
28
  from api.db.services.file_service import FileService
29
  from api.db.services.task_service import TaskService, queue_tasks
@@ -33,7 +33,7 @@ from api.db.services import duplicate_name
33
  from api.db.services.knowledgebase_service import KnowledgebaseService
34
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
35
  from api.utils import get_uuid
36
- from api.db import FileType, TaskStatus, ParserType
37
  from api.db.services.document_service import DocumentService
38
  from api.settings import RetCode
39
  from api.utils.api_utils import get_json_result
@@ -59,12 +59,19 @@ def upload():
59
  return get_json_result(
60
  data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
61
 
 
 
 
 
 
 
 
 
 
 
62
  err = []
63
  for file in file_objs:
64
  try:
65
- e, kb = KnowledgebaseService.get_by_id(kb_id)
66
- if not e:
67
- raise LookupError("Can't find this knowledgebase!")
68
  MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
69
  if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
70
  raise RuntimeError("Exceed the maximum file number of a free user!")
@@ -99,6 +106,8 @@ def upload():
99
  if re.search(r"\.(ppt|pptx|pages)$", filename):
100
  doc["parser_id"] = ParserType.PRESENTATION.value
101
  DocumentService.insert(doc)
 
 
102
  except Exception as e:
103
  err.append(file.filename + ": " + str(e))
104
  if err:
@@ -228,11 +237,13 @@ def rm():
228
  req = request.json
229
  doc_ids = req["doc_id"]
230
  if isinstance(doc_ids, str): doc_ids = [doc_ids]
 
 
 
231
  errors = ""
232
  for doc_id in doc_ids:
233
  try:
234
  e, doc = DocumentService.get_by_id(doc_id)
235
-
236
  if not e:
237
  return get_data_error_result(retmsg="Document not found!")
238
  tenant_id = DocumentService.get_tenant_id(doc_id)
@@ -241,21 +252,25 @@ def rm():
241
 
242
  ELASTICSEARCH.deleteByQuery(
243
  Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
244
- DocumentService.increment_chunk_num(
245
- doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
 
 
246
  if not DocumentService.delete(doc):
247
  return get_data_error_result(
248
  retmsg="Database error (Document removal)!")
249
 
250
- informs = File2DocumentService.get_by_document_id(doc_id)
251
- if not informs:
252
- MINIO.rm(doc.kb_id, doc.location)
253
- else:
254
- File2DocumentService.delete_by_document_id(doc_id)
255
  except Exception as e:
256
  errors += str(e)
257
 
258
- if errors: return server_error_response(e)
 
 
259
  return get_json_result(data=True)
260
 
261
 
 
23
  from flask import request
24
  from flask_login import login_required, current_user
25
 
26
+ from api.db.db_models import Task, File
27
  from api.db.services.file2document_service import File2DocumentService
28
  from api.db.services.file_service import FileService
29
  from api.db.services.task_service import TaskService, queue_tasks
 
33
  from api.db.services.knowledgebase_service import KnowledgebaseService
34
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
35
  from api.utils import get_uuid
36
+ from api.db import FileType, TaskStatus, ParserType, FileSource
37
  from api.db.services.document_service import DocumentService
38
  from api.settings import RetCode
39
  from api.utils.api_utils import get_json_result
 
59
  return get_json_result(
60
  data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
61
 
62
+ e, kb = KnowledgebaseService.get_by_id(kb_id)
63
+ if not e:
64
+ raise LookupError("Can't find this knowledgebase!")
65
+
66
+ root_folder = FileService.get_root_folder(current_user.id)
67
+ pf_id = root_folder["id"]
68
+ FileService.init_knowledgebase_docs(pf_id, current_user.id)
69
+ kb_root_folder = FileService.get_kb_folder(current_user.id)
70
+ kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
71
+
72
  err = []
73
  for file in file_objs:
74
  try:
 
 
 
75
  MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
76
  if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
77
  raise RuntimeError("Exceed the maximum file number of a free user!")
 
106
  if re.search(r"\.(ppt|pptx|pages)$", filename):
107
  doc["parser_id"] = ParserType.PRESENTATION.value
108
  DocumentService.insert(doc)
109
+
110
+ FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
111
  except Exception as e:
112
  err.append(file.filename + ": " + str(e))
113
  if err:
 
237
  req = request.json
238
  doc_ids = req["doc_id"]
239
  if isinstance(doc_ids, str): doc_ids = [doc_ids]
240
+ root_folder = FileService.get_root_folder(current_user.id)
241
+ pf_id = root_folder["id"]
242
+ FileService.init_knowledgebase_docs(pf_id, current_user.id)
243
  errors = ""
244
  for doc_id in doc_ids:
245
  try:
246
  e, doc = DocumentService.get_by_id(doc_id)
 
247
  if not e:
248
  return get_data_error_result(retmsg="Document not found!")
249
  tenant_id = DocumentService.get_tenant_id(doc_id)
 
252
 
253
  ELASTICSEARCH.deleteByQuery(
254
  Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
255
+
256
+ DocumentService.clear_chunk_num(doc_id)
257
+ b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
258
+
259
  if not DocumentService.delete(doc):
260
  return get_data_error_result(
261
  retmsg="Database error (Document removal)!")
262
 
263
+ f2d = File2DocumentService.get_by_document_id(doc_id)
264
+ FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
265
+ File2DocumentService.delete_by_document_id(doc_id)
266
+
267
+ MINIO.rm(b, n)
268
  except Exception as e:
269
  errors += str(e)
270
 
271
+ if errors:
272
+ return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
273
+
274
  return get_json_result(data=True)
275
 
276
 
api/apps/file_app.py CHANGED
@@ -26,7 +26,7 @@ from api.db.services.document_service import DocumentService
26
  from api.db.services.file2document_service import File2DocumentService
27
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
28
  from api.utils import get_uuid
29
- from api.db import FileType
30
  from api.db.services import duplicate_name
31
  from api.db.services.file_service import FileService
32
  from api.settings import RetCode
@@ -45,7 +45,7 @@ def upload():
45
 
46
  if not pf_id:
47
  root_folder = FileService.get_root_folder(current_user.id)
48
- pf_id = root_folder.id
49
 
50
  if 'file' not in request.files:
51
  return get_json_result(
@@ -132,7 +132,7 @@ def create():
132
  input_file_type = request.json.get("type")
133
  if not pf_id:
134
  root_folder = FileService.get_root_folder(current_user.id)
135
- pf_id = root_folder.id
136
 
137
  try:
138
  if not FileService.is_parent_folder_exist(pf_id):
@@ -176,7 +176,8 @@ def list():
176
  desc = request.args.get("desc", True)
177
  if not pf_id:
178
  root_folder = FileService.get_root_folder(current_user.id)
179
- pf_id = root_folder.id
 
180
  try:
181
  e, file = FileService.get_by_id(pf_id)
182
  if not e:
@@ -199,7 +200,7 @@ def list():
199
  def get_root_folder():
200
  try:
201
  root_folder = FileService.get_root_folder(current_user.id)
202
- return get_json_result(data={"root_folder": root_folder.to_json()})
203
  except Exception as e:
204
  return server_error_response(e)
205
 
@@ -250,6 +251,8 @@ def rm():
250
  return get_data_error_result(retmsg="File or Folder not found!")
251
  if not file.tenant_id:
252
  return get_data_error_result(retmsg="Tenant not found!")
 
 
253
 
254
  if file.type == FileType.FOLDER.value:
255
  file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
 
26
  from api.db.services.file2document_service import File2DocumentService
27
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
28
  from api.utils import get_uuid
29
+ from api.db import FileType, FileSource
30
  from api.db.services import duplicate_name
31
  from api.db.services.file_service import FileService
32
  from api.settings import RetCode
 
45
 
46
  if not pf_id:
47
  root_folder = FileService.get_root_folder(current_user.id)
48
+ pf_id = root_folder["id"]
49
 
50
  if 'file' not in request.files:
51
  return get_json_result(
 
132
  input_file_type = request.json.get("type")
133
  if not pf_id:
134
  root_folder = FileService.get_root_folder(current_user.id)
135
+ pf_id = root_folder["id"]
136
 
137
  try:
138
  if not FileService.is_parent_folder_exist(pf_id):
 
176
  desc = request.args.get("desc", True)
177
  if not pf_id:
178
  root_folder = FileService.get_root_folder(current_user.id)
179
+ pf_id = root_folder["id"]
180
+ FileService.init_knowledgebase_docs(pf_id, current_user.id)
181
  try:
182
  e, file = FileService.get_by_id(pf_id)
183
  if not e:
 
200
  def get_root_folder():
201
  try:
202
  root_folder = FileService.get_root_folder(current_user.id)
203
+ return get_json_result(data={"root_folder": root_folder})
204
  except Exception as e:
205
  return server_error_response(e)
206
 
 
251
  return get_data_error_result(retmsg="File or Folder not found!")
252
  if not file.tenant_id:
253
  return get_data_error_result(retmsg="Tenant not found!")
254
+ if file.source_type == FileSource.KNOWLEDGEBASE:
255
+ continue
256
 
257
  if file.type == FileType.FOLDER.value:
258
  file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
api/db/__init__.py CHANGED
@@ -83,3 +83,11 @@ class ParserType(StrEnum):
83
  NAIVE = "naive"
84
  PICTURE = "picture"
85
  ONE = "one"
 
 
 
 
 
 
 
 
 
83
  NAIVE = "naive"
84
  PICTURE = "picture"
85
  ONE = "one"
86
+
87
+
88
+ class FileSource(StrEnum):
89
+ LOCAL = ""
90
+ KNOWLEDGEBASE = "knowledgebase"
91
+ S3 = "s3"
92
+
93
+ KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
api/db/db_models.py CHANGED
@@ -21,14 +21,13 @@ import operator
21
  from functools import wraps
22
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
23
  from flask_login import UserMixin
24
-
25
  from peewee import (
26
- BigAutoField, BigIntegerField, BooleanField, CharField,
27
- CompositeKey, Insert, IntegerField, TextField, FloatField, DateTimeField,
28
  Field, Model, Metadata
29
  )
30
  from playhouse.pool import PooledMySQLDatabase
31
-
32
  from api.db import SerializedType, ParserType
33
  from api.settings import DATABASE, stat_logger, SECRET_KEY
34
  from api.utils.log_utils import getLogger
@@ -344,7 +343,7 @@ class DataBaseModel(BaseModel):
344
 
345
 
346
  @DB.connection_context()
347
- def init_database_tables():
348
  members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
349
  table_objs = []
350
  create_failed_list = []
@@ -361,6 +360,7 @@ def init_database_tables():
361
  if create_failed_list:
362
  LOGGER.info(f"create tables failed: {create_failed_list}")
363
  raise Exception(f"create tables failed: {create_failed_list}")
 
364
 
365
 
366
  def fill_db_model_object(model_object, human_model_dict):
@@ -699,6 +699,11 @@ class File(DataBaseModel):
699
  help_text="where dose it store")
700
  size = IntegerField(default=0)
701
  type = CharField(max_length=32, null=False, help_text="file extension")
 
 
 
 
 
702
 
703
  class Meta:
704
  db_table = "file"
@@ -817,3 +822,14 @@ class API4Conversation(DataBaseModel):
817
 
818
  class Meta:
819
  db_table = "api_4_conversation"
 
 
 
 
 
 
 
 
 
 
 
 
21
  from functools import wraps
22
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
23
  from flask_login import UserMixin
24
+ from playhouse.migrate import MySQLMigrator, migrate
25
  from peewee import (
26
+ BigIntegerField, BooleanField, CharField,
27
+ CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
28
  Field, Model, Metadata
29
  )
30
  from playhouse.pool import PooledMySQLDatabase
 
31
  from api.db import SerializedType, ParserType
32
  from api.settings import DATABASE, stat_logger, SECRET_KEY
33
  from api.utils.log_utils import getLogger
 
343
 
344
 
345
  @DB.connection_context()
346
+ def init_database_tables(alter_fields=[]):
347
  members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
348
  table_objs = []
349
  create_failed_list = []
 
360
  if create_failed_list:
361
  LOGGER.info(f"create tables failed: {create_failed_list}")
362
  raise Exception(f"create tables failed: {create_failed_list}")
363
+ migrate_db()
364
 
365
 
366
  def fill_db_model_object(model_object, human_model_dict):
 
699
  help_text="where dose it store")
700
  size = IntegerField(default=0)
701
  type = CharField(max_length=32, null=False, help_text="file extension")
702
+ source_type = CharField(
703
+ max_length=128,
704
+ null=False,
705
+ default="",
706
+ help_text="where dose this document come from")
707
 
708
  class Meta:
709
  db_table = "file"
 
822
 
823
  class Meta:
824
  db_table = "api_4_conversation"
825
+
826
+
827
+ def migrate_db():
828
+ try:
829
+ with DB.transaction():
830
+ migrator = MySQLMigrator(DB)
831
+ migrate(
832
+ migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", help_text="where dose this document come from"))
833
+ )
834
+ except Exception as e:
835
+ pass
api/db/services/document_service.py CHANGED
@@ -150,6 +150,22 @@ class DocumentService(CommonService):
150
  Knowledgebase.id == kb_id).execute()
151
  return num
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  @classmethod
154
  @DB.connection_context()
155
  def get_tenant_id(cls, doc_id):
 
150
  Knowledgebase.id == kb_id).execute()
151
  return num
152
 
153
+ @classmethod
154
+ @DB.connection_context()
155
+ def clear_chunk_num(cls, doc_id):
156
+ doc = cls.model.get_by_id(doc_id)
157
+ assert doc, "Can't fine document in database."
158
+
159
+ num = Knowledgebase.update(
160
+ token_num=Knowledgebase.token_num -
161
+ doc.token_num,
162
+ chunk_num=Knowledgebase.chunk_num -
163
+ doc.chunk_num,
164
+ doc_num=Knowledgebase.doc_num-1
165
+ ).where(
166
+ Knowledgebase.id == doc.kb_id).execute()
167
+ return num
168
+
169
  @classmethod
170
  @DB.connection_context()
171
  def get_tenant_id(cls, doc_id):
api/db/services/file2document_service.py CHANGED
@@ -15,12 +15,12 @@
15
  #
16
  from datetime import datetime
17
 
 
18
  from api.db.db_models import DB
19
- from api.db.db_models import File, Document, File2Document
20
  from api.db.services.common_service import CommonService
21
  from api.db.services.document_service import DocumentService
22
- from api.db.services.file_service import FileService
23
- from api.utils import current_timestamp, datetime_format
24
 
25
 
26
  class File2DocumentService(CommonService):
@@ -71,13 +71,15 @@ class File2DocumentService(CommonService):
71
  @DB.connection_context()
72
  def get_minio_address(cls, doc_id=None, file_id=None):
73
  if doc_id:
74
- ids = File2DocumentService.get_by_document_id(doc_id)
75
  else:
76
- ids = File2DocumentService.get_by_file_id(file_id)
77
- if ids:
78
- e, file = FileService.get_by_id(ids[0].file_id)
79
- return file.parent_id, file.location
80
- else:
81
- assert doc_id, "please specify doc_id"
82
- e, doc = DocumentService.get_by_id(doc_id)
83
- return doc.kb_id, doc.location
 
 
 
15
  #
16
  from datetime import datetime
17
 
18
+ from api.db import FileSource
19
  from api.db.db_models import DB
20
+ from api.db.db_models import File, File2Document
21
  from api.db.services.common_service import CommonService
22
  from api.db.services.document_service import DocumentService
23
+ from api.utils import current_timestamp, datetime_format, get_uuid
 
24
 
25
 
26
  class File2DocumentService(CommonService):
 
71
  @DB.connection_context()
72
  def get_minio_address(cls, doc_id=None, file_id=None):
73
  if doc_id:
74
+ f2d = cls.get_by_document_id(doc_id)
75
  else:
76
+ f2d = cls.get_by_file_id(file_id)
77
+ if f2d:
78
+ file = File.get_by_id(f2d[0].file_id)
79
+ if file.source_type == FileSource.LOCAL:
80
+ return file.parent_id, file.location
81
+ doc_id = f2d[0].document_id
82
+
83
+ assert doc_id, "please specify doc_id"
84
+ e, doc = DocumentService.get_by_id(doc_id)
85
+ return doc.kb_id, doc.location
api/db/services/file_service.py CHANGED
@@ -16,10 +16,12 @@
16
  from flask_login import current_user
17
  from peewee import fn
18
 
19
- from api.db import FileType
20
  from api.db.db_models import DB, File2Document, Knowledgebase
21
  from api.db.db_models import File, Document
22
  from api.db.services.common_service import CommonService
 
 
23
  from api.utils import get_uuid
24
 
25
 
@@ -33,10 +35,15 @@ class FileService(CommonService):
33
  if keywords:
34
  files = cls.model.select().where(
35
  (cls.model.tenant_id == tenant_id)
36
- & (cls.model.parent_id == pf_id), (fn.LOWER(cls.model.name).like(f"%%{keywords.lower()}%%")))
 
 
 
37
  else:
38
- files = cls.model.select().where((cls.model.tenant_id == tenant_id)
39
- & (cls.model.parent_id == pf_id))
 
 
40
  count = files.count()
41
  if desc:
42
  files = files.order_by(cls.model.getter_by(orderby).desc())
@@ -135,29 +142,69 @@ class FileService(CommonService):
135
  @classmethod
136
  @DB.connection_context()
137
  def get_root_folder(cls, tenant_id):
138
- file = cls.model.select().where(cls.model.tenant_id == tenant_id and
139
- cls.model.parent_id == cls.model.id)
140
- if not file:
141
- file_id = get_uuid()
142
- file = {
143
- "id": file_id,
144
- "parent_id": file_id,
145
- "tenant_id": tenant_id,
146
- "created_by": tenant_id,
147
- "name": "/",
148
- "type": FileType.FOLDER.value,
149
- "size": 0,
150
- "location": "",
151
- }
152
- cls.save(**file)
153
- else:
154
- file_id = file[0].id
155
 
156
- e, file = cls.get_by_id(file_id)
157
- if not e:
158
- raise RuntimeError("Database error (File retrieval)!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  return file
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  @classmethod
162
  @DB.connection_context()
163
  def get_parent_folder(cls, file_id):
@@ -241,3 +288,20 @@ class FileService(CommonService):
241
  dfs(folder_id)
242
  return size
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  from flask_login import current_user
17
  from peewee import fn
18
 
19
+ from api.db import FileType, KNOWLEDGEBASE_FOLDER_NAME, FileSource
20
  from api.db.db_models import DB, File2Document, Knowledgebase
21
  from api.db.db_models import File, Document
22
  from api.db.services.common_service import CommonService
23
+ from api.db.services.document_service import DocumentService
24
+ from api.db.services.file2document_service import File2DocumentService
25
  from api.utils import get_uuid
26
 
27
 
 
35
  if keywords:
36
  files = cls.model.select().where(
37
  (cls.model.tenant_id == tenant_id)
38
+ (cls.model.parent_id == pf_id),
39
+ (fn.LOWER(cls.model.name).like(f"%%{keywords.lower()}%%")),
40
+ ~(cls.model.id == pf_id)
41
+ )
42
  else:
43
+ files = cls.model.select().where((cls.model.tenant_id == tenant_id),
44
+ (cls.model.parent_id == pf_id),
45
+ ~(cls.model.id == pf_id)
46
+ )
47
  count = files.count()
48
  if desc:
49
  files = files.order_by(cls.model.getter_by(orderby).desc())
 
142
  @classmethod
143
  @DB.connection_context()
144
  def get_root_folder(cls, tenant_id):
145
+ for file in cls.model.select().where((cls.model.tenant_id == tenant_id),
146
+ (cls.model.parent_id == cls.model.id)
147
+ ):
148
+ return file.to_dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ file_id = get_uuid()
151
+ file = {
152
+ "id": file_id,
153
+ "parent_id": file_id,
154
+ "tenant_id": tenant_id,
155
+ "created_by": tenant_id,
156
+ "name": "/",
157
+ "type": FileType.FOLDER.value,
158
+ "size": 0,
159
+ "location": "",
160
+ }
161
+ cls.save(**file)
162
+ return file
163
+
164
+ @classmethod
165
+ @DB.connection_context()
166
+ def get_kb_folder(cls, tenant_id):
167
+ for root in cls.model.select().where(cls.model.tenant_id == tenant_id and
168
+ cls.model.parent_id == cls.model.id):
169
+ for folder in cls.model.select().where(cls.model.tenant_id == tenant_id and
170
+ cls.model.parent_id == root.id and
171
+ cls.model.name == KNOWLEDGEBASE_FOLDER_NAME
172
+ ):
173
+ return folder.to_dict()
174
+ assert False, "Can't find the KB folder. Database init error."
175
+
176
+ @classmethod
177
+ @DB.connection_context()
178
+ def new_a_file_from_kb(cls, tenant_id, name, parent_id, ty=FileType.FOLDER.value, size=0, location=""):
179
+ for file in cls.query(tenant_id=tenant_id, parent_id=parent_id, name=name):
180
+ return file.to_dict()
181
+ file = {
182
+ "id": get_uuid(),
183
+ "parent_id": parent_id,
184
+ "tenant_id": tenant_id,
185
+ "created_by": tenant_id,
186
+ "name": name,
187
+ "type": ty,
188
+ "size": size,
189
+ "location": location,
190
+ "source_type": FileSource.KNOWLEDGEBASE
191
+ }
192
+ cls.save(**file)
193
  return file
194
 
195
+ @classmethod
196
+ @DB.connection_context()
197
+ def init_knowledgebase_docs(cls, root_id, tenant_id):
198
+ for _ in cls.model.select().where((cls.model.name == KNOWLEDGEBASE_FOLDER_NAME)\
199
+ & (cls.model.parent_id == root_id)):
200
+ return
201
+ folder = cls.new_a_file_from_kb(tenant_id, KNOWLEDGEBASE_FOLDER_NAME, root_id)
202
+
203
+ for kb in Knowledgebase.select(*[Knowledgebase.id, Knowledgebase.name]).where(Knowledgebase.tenant_id==tenant_id):
204
+ kb_folder = cls.new_a_file_from_kb(tenant_id, kb.name, folder["id"])
205
+ for doc in DocumentService.query(kb_id=kb.id):
206
+ FileService.add_file_from_kb(doc.to_dict(), kb_folder["id"], tenant_id)
207
+
208
  @classmethod
209
  @DB.connection_context()
210
  def get_parent_folder(cls, file_id):
 
288
  dfs(folder_id)
289
  return size
290
 
291
+ @classmethod
292
+ @DB.connection_context()
293
+ def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
294
+ for _ in File2DocumentService.get_by_document_id(doc["id"]): return
295
+ file = {
296
+ "id": get_uuid(),
297
+ "parent_id": kb_folder_id,
298
+ "tenant_id": tenant_id,
299
+ "created_by": tenant_id,
300
+ "name": doc["name"],
301
+ "type": doc["type"],
302
+ "size": doc["size"],
303
+ "location": doc["location"],
304
+ "source_type": FileSource.KNOWLEDGEBASE
305
+ }
306
+ cls.save(**file)
307
+ File2DocumentService.save(**{"id": get_uuid(), "file_id": file["id"], "document_id": doc["id"]})
docker/entrypoint.sh CHANGED
@@ -8,14 +8,14 @@ PY=/root/miniconda3/envs/py11/bin/python
8
 
9
  function task_exe(){
10
  while [ 1 -eq 1 ];do
11
- $PY rag/svr/task_executor.py $1 $2;
12
  done
13
  }
14
 
15
  WS=1
16
  for ((i=0;i<WS;i++))
17
  do
18
- task_exe $i $WS &
19
  done
20
 
21
  while [ 1 -eq 1 ];do
 
8
 
9
  function task_exe(){
10
  while [ 1 -eq 1 ];do
11
+ $PY rag/svr/task_executor.py ;
12
  done
13
  }
14
 
15
  WS=1
16
  for ((i=0;i<WS;i++))
17
  do
18
+ task_exe &
19
  done
20
 
21
  while [ 1 -eq 1 ];do
rag/svr/task_executor.py CHANGED
@@ -109,6 +109,7 @@ def collect():
109
  if not msg: return pd.DataFrame()
110
 
111
  if TaskService.do_cancel(msg["id"]):
 
112
  return pd.DataFrame()
113
  tasks = TaskService.get_tasks(msg["id"])
114
  assert tasks, "{} empty task!".format(msg["id"])
 
109
  if not msg: return pd.DataFrame()
110
 
111
  if TaskService.do_cancel(msg["id"]):
112
+ cron_logger.info("Task {} has been canceled.".format(msg["id"]))
113
  return pd.DataFrame()
114
  tasks = TaskService.get_tasks(msg["id"])
115
  assert tasks, "{} empty task!".format(msg["id"])
requirements_dev.txt CHANGED
@@ -78,8 +78,6 @@ pycryptodomex==3.20.0
78
  pydantic==2.6.2
79
  pydantic_core==2.16.3
80
  PyJWT==2.8.0
81
- PyMuPDF==1.23.25
82
- PyMuPDFb==1.23.22
83
  PyMySQL==1.1.0
84
  PyPDF2==3.0.1
85
  pypdfium2==4.27.0
 
78
  pydantic==2.6.2
79
  pydantic_core==2.16.3
80
  PyJWT==2.8.0
 
 
81
  PyMySQL==1.1.0
82
  PyPDF2==3.0.1
83
  pypdfium2==4.27.0