KevinHuSh commited on
Commit
21cd893
·
1 Parent(s): afa33a7

add file management feature (#560)

Browse files

### What problem does this PR solve?

### Type of change

- [x] Documentation Update

README.md CHANGED
@@ -58,6 +58,7 @@
58
 
59
  ## 📌 Latest Features
60
 
 
61
  - 2024-04-19 Support conversation API ([detail](./docs/conversation_api.md)).
62
  - 2024-04-16 Add an embedding model 'bce-embedding-base_v1' from [BCEmbedding](https://github.com/netease-youdao/BCEmbedding).
63
  - 2024-04-16 Add [FastEmbed](https://github.com/qdrant/fastembed), which is designed specifically for light and speedy embedding.
 
58
 
59
  ## 📌 Latest Features
60
 
61
+ - 2024-04-26 Add file management.
62
  - 2024-04-19 Support conversation API ([detail](./docs/conversation_api.md)).
63
  - 2024-04-16 Add an embedding model 'bce-embedding-base_v1' from [BCEmbedding](https://github.com/netease-youdao/BCEmbedding).
64
  - 2024-04-16 Add [FastEmbed](https://github.com/qdrant/fastembed), which is designed specifically for light and speedy embedding.
README_ja.md CHANGED
@@ -58,6 +58,7 @@
58
 
59
  ## 📌 最新の機能
60
 
 
61
  - 2024-04-19 会話 API をサポートします ([詳細](./docs/conversation_api.md))。
62
  - 2024-04-16 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) から埋め込みモデル「bce-embedding-base_v1」を追加します。
63
  - 2024-04-16 [FastEmbed](https://github.com/qdrant/fastembed) は、軽量かつ高速な埋め込み用に設計されています。
 
58
 
59
  ## 📌 最新の機能
60
 
61
+ - 2024-04-26 「ファイル管理」機能を追加しました。
62
  - 2024-04-19 会話 API をサポートします ([詳細](./docs/conversation_api.md))。
63
  - 2024-04-16 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) から埋め込みモデル「bce-embedding-base_v1」を追加します。
64
  - 2024-04-16 [FastEmbed](https://github.com/qdrant/fastembed) は、軽量かつ高速な埋め込み用に設計されています。
README_zh.md CHANGED
@@ -58,6 +58,7 @@
58
 
59
  ## 📌 新增功能
60
 
 
61
  - 2024-04-19 支持对话 API ([更多](./docs/conversation_api.md)).
62
  - 2024-04-16 添加嵌入模型 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) 。
63
  - 2024-04-16 添加 [FastEmbed](https://github.com/qdrant/fastembed) 专为轻型和高速嵌入而设计。
 
58
 
59
  ## 📌 新增功能
60
 
61
+ - 2024-04-26 增添了'文件管理'功能.
62
  - 2024-04-19 支持对话 API ([更多](./docs/conversation_api.md)).
63
  - 2024-04-16 添加嵌入模型 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) 。
64
  - 2024-04-16 添加 [FastEmbed](https://github.com/qdrant/fastembed) 专为轻型和高速嵌入而设计。
api/apps/document_app.py CHANGED
@@ -23,6 +23,9 @@ import flask
23
  from elasticsearch_dsl import Q
24
  from flask import request
25
  from flask_login import login_required, current_user
 
 
 
26
  from rag.nlp import search
27
  from rag.utils import ELASTICSEARCH
28
  from api.db.services import duplicate_name
@@ -68,7 +71,7 @@ def upload():
68
  name=file.filename,
69
  kb_id=kb.id)
70
  filetype = filename_type(filename)
71
- if not filetype:
72
  return get_data_error_result(
73
  retmsg="This type of file has not been supported yet!")
74
 
@@ -218,26 +221,39 @@ def change_status():
218
  @validate_request("doc_id")
219
  def rm():
220
  req = request.json
221
- try:
222
- e, doc = DocumentService.get_by_id(req["doc_id"])
223
- if not e:
224
- return get_data_error_result(retmsg="Document not found!")
225
- tenant_id = DocumentService.get_tenant_id(req["doc_id"])
226
- if not tenant_id:
227
- return get_data_error_result(retmsg="Tenant not found!")
228
- ELASTICSEARCH.deleteByQuery(
229
- Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
230
-
231
- DocumentService.increment_chunk_num(
232
- doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
233
- if not DocumentService.delete(doc):
234
- return get_data_error_result(
235
- retmsg="Database error (Document removal)!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- MINIO.rm(doc.kb_id, doc.location)
238
- return get_json_result(data=True)
239
- except Exception as e:
240
- return server_error_response(e)
241
 
242
 
243
  @manager.route('/run', methods=['POST'])
@@ -302,7 +318,13 @@ def get(doc_id):
302
  if not e:
303
  return get_data_error_result(retmsg="Document not found!")
304
 
305
- response = flask.make_response(MINIO.get(doc.kb_id, doc.location))
 
 
 
 
 
 
306
  ext = re.search(r"\.([^.]+)$", doc.name)
307
  if ext:
308
  if doc.type == FileType.VISUAL.value:
 
23
  from elasticsearch_dsl import Q
24
  from flask import request
25
  from flask_login import login_required, current_user
26
+
27
+ from api.db.services.file2document_service import File2DocumentService
28
+ from api.db.services.file_service import FileService
29
  from rag.nlp import search
30
  from rag.utils import ELASTICSEARCH
31
  from api.db.services import duplicate_name
 
71
  name=file.filename,
72
  kb_id=kb.id)
73
  filetype = filename_type(filename)
74
+ if filetype == FileType.OTHER.value:
75
  return get_data_error_result(
76
  retmsg="This type of file has not been supported yet!")
77
 
 
221
  @validate_request("doc_id")
222
  def rm():
223
  req = request.json
224
+ doc_ids = req["doc_id"]
225
+ if isinstance(doc_ids, str): doc_ids = [doc_ids]
226
+ errors = ""
227
+ for doc_id in doc_ids:
228
+ try:
229
+ e, doc = DocumentService.get_by_id(doc_id)
230
+
231
+ if not e:
232
+ return get_data_error_result(retmsg="Document not found!")
233
+ tenant_id = DocumentService.get_tenant_id(doc_id)
234
+ if not tenant_id:
235
+ return get_data_error_result(retmsg="Tenant not found!")
236
+
237
+ ELASTICSEARCH.deleteByQuery(
238
+ Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
239
+ DocumentService.increment_chunk_num(
240
+ doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
241
+ if not DocumentService.delete(doc):
242
+ return get_data_error_result(
243
+ retmsg="Database error (Document removal)!")
244
+
245
+ informs = File2DocumentService.get_by_document_id(doc_id)
246
+ if not informs:
247
+ MINIO.rm(doc.kb_id, doc.location)
248
+ else:
249
+ File2DocumentService.delete_by_document_id(doc_id)
250
+ except Exception as e:
251
+ errors += str(e)
252
+
253
+
254
+ if errors: return server_error_response(e)
255
+ return get_json_result(data=True)
256
 
 
 
 
 
257
 
258
 
259
  @manager.route('/run', methods=['POST'])
 
318
  if not e:
319
  return get_data_error_result(retmsg="Document not found!")
320
 
321
+ informs = File2DocumentService.get_by_document_id(doc_id)
322
+ if not informs:
323
+ response = flask.make_response(MINIO.get(doc.kb_id, doc.location))
324
+ else:
325
+ e, file = FileService.get_by_id(informs[0].file_id)
326
+ response = flask.make_response(MINIO.get(file.parent_id, doc.location))
327
+
328
  ext = re.search(r"\.([^.]+)$", doc.name)
329
  if ext:
330
  if doc.type == FileType.VISUAL.value:
api/apps/file2document_app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License
15
+ #
16
+ from elasticsearch_dsl import Q
17
+
18
+ from api.db.db_models import File2Document
19
+ from api.db.services.file2document_service import File2DocumentService
20
+ from api.db.services.file_service import FileService
21
+
22
+ from flask import request
23
+ from flask_login import login_required, current_user
24
+ from api.db.services.knowledgebase_service import KnowledgebaseService
25
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
26
+ from api.utils import get_uuid
27
+ from api.db import FileType
28
+ from api.db.services.document_service import DocumentService
29
+ from api.settings import RetCode
30
+ from api.utils.api_utils import get_json_result
31
+ from rag.nlp import search
32
+ from rag.utils import ELASTICSEARCH
33
+
34
+
35
+ @manager.route('/convert', methods=['POST'])
36
+ @login_required
37
+ @validate_request("file_ids", "kb_ids")
38
+ def convert():
39
+ req = request.json
40
+ kb_ids = req["kb_ids"]
41
+ file_ids = req["file_ids"]
42
+ file2documents = []
43
+
44
+ try:
45
+ for file_id in file_ids:
46
+ e, file = FileService.get_by_id(file_id)
47
+ file_ids_list = [file_id]
48
+ if file.type == FileType.FOLDER:
49
+ file_ids_list = FileService.get_all_innermost_file_ids(file_id, [])
50
+ for id in file_ids_list:
51
+ informs = File2DocumentService.get_by_file_id(id)
52
+ # delete
53
+ for inform in informs:
54
+ doc_id = inform.document_id
55
+ e, doc = DocumentService.get_by_id(doc_id)
56
+ if not e:
57
+ return get_data_error_result(retmsg="Document not found!")
58
+ tenant_id = DocumentService.get_tenant_id(doc_id)
59
+ if not tenant_id:
60
+ return get_data_error_result(retmsg="Tenant not found!")
61
+ ELASTICSEARCH.deleteByQuery(
62
+ Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
63
+ DocumentService.increment_chunk_num(
64
+ doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
65
+ if not DocumentService.delete(doc):
66
+ return get_data_error_result(
67
+ retmsg="Database error (Document removal)!")
68
+ File2DocumentService.delete_by_file_id(id)
69
+
70
+ # insert
71
+ for kb_id in kb_ids:
72
+ e, kb = KnowledgebaseService.get_by_id(kb_id)
73
+ if not e:
74
+ return get_data_error_result(
75
+ retmsg="Can't find this knowledgebase!")
76
+ e, file = FileService.get_by_id(id)
77
+ if not e:
78
+ return get_data_error_result(
79
+ retmsg="Can't find this file!")
80
+
81
+ doc = DocumentService.insert({
82
+ "id": get_uuid(),
83
+ "kb_id": kb.id,
84
+ "parser_id": kb.parser_id,
85
+ "parser_config": kb.parser_config,
86
+ "created_by": current_user.id,
87
+ "type": file.type,
88
+ "name": file.name,
89
+ "location": file.location,
90
+ "size": file.size
91
+ })
92
+ file2document = File2DocumentService.insert({
93
+ "id": get_uuid(),
94
+ "file_id": id,
95
+ "document_id": doc.id,
96
+ })
97
+ file2documents.append(file2document.to_json())
98
+ return get_json_result(data=file2documents)
99
+ except Exception as e:
100
+ return server_error_response(e)
101
+
102
+
103
+ @manager.route('/rm', methods=['POST'])
104
+ @login_required
105
+ @validate_request("file_ids")
106
+ def rm():
107
+ req = request.json
108
+ file_ids = req["file_ids"]
109
+ if not file_ids:
110
+ return get_json_result(
111
+ data=False, retmsg='Lack of "Files ID"', retcode=RetCode.ARGUMENT_ERROR)
112
+ try:
113
+ for file_id in file_ids:
114
+ informs = File2DocumentService.get_by_file_id(file_id)
115
+ if not informs:
116
+ return get_data_error_result(retmsg="Inform not found!")
117
+ for inform in informs:
118
+ if not inform:
119
+ return get_data_error_result(retmsg="Inform not found!")
120
+ File2DocumentService.delete_by_file_id(file_id)
121
+ doc_id = inform.document_id
122
+ e, doc = DocumentService.get_by_id(doc_id)
123
+ if not e:
124
+ return get_data_error_result(retmsg="Document not found!")
125
+ tenant_id = DocumentService.get_tenant_id(doc_id)
126
+ if not tenant_id:
127
+ return get_data_error_result(retmsg="Tenant not found!")
128
+ ELASTICSEARCH.deleteByQuery(
129
+ Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
130
+ DocumentService.increment_chunk_num(
131
+ doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
132
+ if not DocumentService.delete(doc):
133
+ return get_data_error_result(
134
+ retmsg="Database error (Document removal)!")
135
+ return get_json_result(data=True)
136
+ except Exception as e:
137
+ return server_error_response(e)
api/apps/file_app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License
15
+ #
16
+ import os
17
+ import pathlib
18
+ import re
19
+
20
+ import flask
21
+ from elasticsearch_dsl import Q
22
+ from flask import request
23
+ from flask_login import login_required, current_user
24
+
25
+ from api.db.services.document_service import DocumentService
26
+ from api.db.services.file2document_service import File2DocumentService
27
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
28
+ from api.utils import get_uuid
29
+ from api.db import FileType
30
+ from api.db.services import duplicate_name
31
+ from api.db.services.file_service import FileService
32
+ from api.settings import RetCode
33
+ from api.utils.api_utils import get_json_result
34
+ from api.utils.file_utils import filename_type
35
+ from rag.nlp import search
36
+ from rag.utils import ELASTICSEARCH
37
+ from rag.utils.minio_conn import MINIO
38
+
39
+
40
+ @manager.route('/upload', methods=['POST'])
41
+ @login_required
42
+ # @validate_request("parent_id")
43
+ def upload():
44
+ pf_id = request.form.get("parent_id")
45
+
46
+ if not pf_id:
47
+ root_folder = FileService.get_root_folder(current_user.id)
48
+ pf_id = root_folder.id
49
+
50
+ if 'file' not in request.files:
51
+ return get_json_result(
52
+ data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
53
+ file_objs = request.files.getlist('file')
54
+
55
+ for file_obj in file_objs:
56
+ if file_obj.filename == '':
57
+ return get_json_result(
58
+ data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
59
+ file_res = []
60
+ try:
61
+ for file_obj in file_objs:
62
+ e, file = FileService.get_by_id(pf_id)
63
+ if not e:
64
+ return get_data_error_result(
65
+ retmsg="Can't find this folder!")
66
+ MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
67
+ if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
68
+ return get_data_error_result(
69
+ retmsg="Exceed the maximum file number of a free user!")
70
+
71
+ # split file name path
72
+ if not file_obj.filename:
73
+ e, file = FileService.get_by_id(pf_id)
74
+ file_obj_names = [file.name, file_obj.filename]
75
+ else:
76
+ full_path = '/' + file_obj.filename
77
+ file_obj_names = full_path.split('/')
78
+ file_len = len(file_obj_names)
79
+
80
+ # get folder
81
+ file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
82
+ len_id_list = len(file_id_list)
83
+
84
+ # create folder
85
+ if file_len != len_id_list:
86
+ e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
87
+ if not e:
88
+ return get_data_error_result(retmsg="Folder not found!")
89
+ last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
90
+ len_id_list)
91
+ else:
92
+ e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
93
+ if not e:
94
+ return get_data_error_result(retmsg="Folder not found!")
95
+ last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
96
+ len_id_list)
97
+
98
+ # file type
99
+ filetype = filename_type(file_obj_names[file_len - 1])
100
+ location = file_obj_names[file_len - 1]
101
+ while MINIO.obj_exist(last_folder.id, location):
102
+ location += "_"
103
+ blob = file_obj.read()
104
+ filename = duplicate_name(
105
+ FileService.query,
106
+ name=file_obj_names[file_len - 1],
107
+ parent_id=last_folder.id)
108
+ file = {
109
+ "id": get_uuid(),
110
+ "parent_id": last_folder.id,
111
+ "tenant_id": current_user.id,
112
+ "created_by": current_user.id,
113
+ "type": filetype,
114
+ "name": filename,
115
+ "location": location,
116
+ "size": len(blob),
117
+ }
118
+ file = FileService.insert(file)
119
+ MINIO.put(last_folder.id, location, blob)
120
+ file_res.append(file.to_json())
121
+ return get_json_result(data=file_res)
122
+ except Exception as e:
123
+ return server_error_response(e)
124
+
125
+
126
+ @manager.route('/create', methods=['POST'])
127
+ @login_required
128
+ @validate_request("name")
129
+ def create():
130
+ req = request.json
131
+ pf_id = request.json.get("parent_id")
132
+ input_file_type = request.json.get("type")
133
+ if not pf_id:
134
+ root_folder = FileService.get_root_folder(current_user.id)
135
+ pf_id = root_folder.id
136
+
137
+ try:
138
+ if not FileService.is_parent_folder_exist(pf_id):
139
+ return get_json_result(
140
+ data=False, retmsg="Parent Folder Doesn't Exist!", retcode=RetCode.OPERATING_ERROR)
141
+ if FileService.query(name=req["name"], parent_id=pf_id):
142
+ return get_data_error_result(
143
+ retmsg="Duplicated folder name in the same folder.")
144
+
145
+ if input_file_type == FileType.FOLDER.value:
146
+ file_type = FileType.FOLDER
147
+ else:
148
+ file_type = FileType.VIRTUAL
149
+
150
+ file = FileService.insert({
151
+ "id": get_uuid(),
152
+ "parent_id": pf_id,
153
+ "tenant_id": current_user.id,
154
+ "created_by": current_user.id,
155
+ "name": req["name"],
156
+ "location": "",
157
+ "size": 0,
158
+ "type": file_type
159
+ })
160
+
161
+ return get_json_result(data=file.to_json())
162
+ except Exception as e:
163
+ return server_error_response(e)
164
+
165
+
166
+ @manager.route('/list', methods=['GET'])
167
+ @login_required
168
+ def list():
169
+ pf_id = request.args.get("parent_id")
170
+
171
+ keywords = request.args.get("keywords", "")
172
+
173
+ page_number = int(request.args.get("page", 1))
174
+ items_per_page = int(request.args.get("page_size", 15))
175
+ orderby = request.args.get("orderby", "create_time")
176
+ desc = request.args.get("desc", True)
177
+ if not pf_id:
178
+ root_folder = FileService.get_root_folder(current_user.id)
179
+ pf_id = root_folder.id
180
+ try:
181
+ e, file = FileService.get_by_id(pf_id)
182
+ if not e:
183
+ return get_data_error_result(retmsg="Folder not found!")
184
+
185
+ files, total = FileService.get_by_pf_id(
186
+ current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
187
+
188
+ parent_folder = FileService.get_parent_folder(pf_id)
189
+ if not FileService.get_parent_folder(pf_id):
190
+ return get_json_result(retmsg="File not found!")
191
+
192
+ return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
193
+ except Exception as e:
194
+ return server_error_response(e)
195
+
196
+
197
+ @manager.route('/root_folder', methods=['GET'])
198
+ @login_required
199
+ def get_root_folder():
200
+ try:
201
+ root_folder = FileService.get_root_folder(current_user.id)
202
+ return get_json_result(data={"root_folder": root_folder.to_json()})
203
+ except Exception as e:
204
+ return server_error_response(e)
205
+
206
+
207
+ @manager.route('/parent_folder', methods=['GET'])
208
+ @login_required
209
+ def get_parent_folder():
210
+ file_id = request.args.get("file_id")
211
+ try:
212
+ e, file = FileService.get_by_id(file_id)
213
+ if not e:
214
+ return get_data_error_result(retmsg="Folder not found!")
215
+
216
+ parent_folder = FileService.get_parent_folder(file_id)
217
+ return get_json_result(data={"parent_folder": parent_folder.to_json()})
218
+ except Exception as e:
219
+ return server_error_response(e)
220
+
221
+
222
+ @manager.route('/all_parent_folder', methods=['GET'])
223
+ @login_required
224
+ def get_all_parent_folders():
225
+ file_id = request.args.get("file_id")
226
+ try:
227
+ e, file = FileService.get_by_id(file_id)
228
+ if not e:
229
+ return get_data_error_result(retmsg="Folder not found!")
230
+
231
+ parent_folders = FileService.get_all_parent_folders(file_id)
232
+ parent_folders_res = []
233
+ for parent_folder in parent_folders:
234
+ parent_folders_res.append(parent_folder.to_json())
235
+ return get_json_result(data={"parent_folders": parent_folders_res})
236
+ except Exception as e:
237
+ return server_error_response(e)
238
+
239
+
240
+ @manager.route('/rm', methods=['POST'])
241
+ @login_required
242
+ @validate_request("file_ids")
243
+ def rm():
244
+ req = request.json
245
+ file_ids = req["file_ids"]
246
+ try:
247
+ for file_id in file_ids:
248
+ e, file = FileService.get_by_id(file_id)
249
+ if not e:
250
+ return get_data_error_result(retmsg="File or Folder not found!")
251
+ if not file.tenant_id:
252
+ return get_data_error_result(retmsg="Tenant not found!")
253
+
254
+ if file.type == FileType.FOLDER:
255
+ file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
256
+ for inner_file_id in file_id_list:
257
+ e, file = FileService.get_by_id(inner_file_id)
258
+ if not e:
259
+ return get_data_error_result(retmsg="File not found!")
260
+ MINIO.rm(file.parent_id, file.location)
261
+ FileService.delete_folder_by_pf_id(current_user.id, file_id)
262
+ else:
263
+ if not FileService.delete(file):
264
+ return get_data_error_result(
265
+ retmsg="Database error (File removal)!")
266
+
267
+ # delete file2document
268
+ informs = File2DocumentService.get_by_file_id(file_id)
269
+ for inform in informs:
270
+ doc_id = inform.document_id
271
+ e, doc = DocumentService.get_by_id(doc_id)
272
+ if not e:
273
+ return get_data_error_result(retmsg="Document not found!")
274
+ tenant_id = DocumentService.get_tenant_id(doc_id)
275
+ if not tenant_id:
276
+ return get_data_error_result(retmsg="Tenant not found!")
277
+ ELASTICSEARCH.deleteByQuery(
278
+ Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
279
+ DocumentService.increment_chunk_num(
280
+ doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
281
+ if not DocumentService.delete(doc):
282
+ return get_data_error_result(
283
+ retmsg="Database error (Document removal)!")
284
+ File2DocumentService.delete_by_file_id(file_id)
285
+
286
+ return get_json_result(data=True)
287
+ except Exception as e:
288
+ return server_error_response(e)
289
+
290
+
291
+ @manager.route('/rename', methods=['POST'])
292
+ @login_required
293
+ @validate_request("file_id", "name")
294
+ def rename():
295
+ req = request.json
296
+ try:
297
+ e, file = FileService.get_by_id(req["file_id"])
298
+ if not e:
299
+ return get_data_error_result(retmsg="File not found!")
300
+ if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
301
+ file.name.lower()).suffix:
302
+ return get_json_result(
303
+ data=False,
304
+ retmsg="The extension of file can't be changed",
305
+ retcode=RetCode.ARGUMENT_ERROR)
306
+ if FileService.query(name=req["name"], pf_id=file.parent_id):
307
+ return get_data_error_result(
308
+ retmsg="Duplicated file name in the same folder.")
309
+
310
+ if not FileService.update_by_id(
311
+ req["file_id"], {"name": req["name"]}):
312
+ return get_data_error_result(
313
+ retmsg="Database error (File rename)!")
314
+
315
+ return get_json_result(data=True)
316
+ except Exception as e:
317
+ return server_error_response(e)
318
+
319
+
320
+ @manager.route('/get/<file_id>', methods=['GET'])
321
+ # @login_required
322
+ def get(file_id):
323
+ try:
324
+ e, doc = FileService.get_by_id(file_id)
325
+ if not e:
326
+ return get_data_error_result(retmsg="Document not found!")
327
+
328
+ response = flask.make_response(MINIO.get(doc.parent_id, doc.location))
329
+ ext = re.search(r"\.([^.]+)$", doc.name)
330
+ if ext:
331
+ if doc.type == FileType.VISUAL.value:
332
+ response.headers.set('Content-Type', 'image/%s' % ext.group(1))
333
+ else:
334
+ response.headers.set(
335
+ 'Content-Type',
336
+ 'application/%s' %
337
+ ext.group(1))
338
+ return response
339
+ except Exception as e:
340
+ return server_error_response(e)
api/apps/user_app.py CHANGED
@@ -28,6 +28,7 @@ from api.db import UserTenantRole, LLMType
28
  from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
29
  LLM_FACTORY, LLM_BASE_URL
30
  from api.db.services.user_service import UserService, TenantService, UserTenantService
 
31
  from api.settings import stat_logger
32
  from api.utils.api_utils import get_json_result, cors_reponse
33
 
@@ -221,6 +222,17 @@ def user_register(user_id, user):
221
  "invited_by": user_id,
222
  "role": UserTenantRole.OWNER
223
  }
 
 
 
 
 
 
 
 
 
 
 
224
  tenant_llm = []
225
  for llm in LLMService.query(fid=LLM_FACTORY):
226
  tenant_llm.append({"tenant_id": user_id,
@@ -236,6 +248,7 @@ def user_register(user_id, user):
236
  TenantService.insert(**tenant)
237
  UserTenantService.insert(**usr_tenant)
238
  TenantLLMService.insert_many(tenant_llm)
 
239
  return UserService.query(email=user["email"])
240
 
241
 
 
28
  from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
29
  LLM_FACTORY, LLM_BASE_URL
30
  from api.db.services.user_service import UserService, TenantService, UserTenantService
31
+ from api.db.services.file_service import FileService
32
  from api.settings import stat_logger
33
  from api.utils.api_utils import get_json_result, cors_reponse
34
 
 
222
  "invited_by": user_id,
223
  "role": UserTenantRole.OWNER
224
  }
225
+ file_id = get_uuid()
226
+ file = {
227
+ "id": file_id,
228
+ "parent_id": file_id,
229
+ "tenant_id": user_id,
230
+ "created_by": user_id,
231
+ "name": "/",
232
+ "type": FileType.FOLDER,
233
+ "size": 0,
234
+ "location": "",
235
+ }
236
  tenant_llm = []
237
  for llm in LLMService.query(fid=LLM_FACTORY):
238
  tenant_llm.append({"tenant_id": user_id,
 
248
  TenantService.insert(**tenant)
249
  UserTenantService.insert(**usr_tenant)
250
  TenantLLMService.insert_many(tenant_llm)
251
+ FileService.insert(file)
252
  return UserService.query(email=user["email"])
253
 
254
 
api/db/__init__.py CHANGED
@@ -45,6 +45,8 @@ class FileType(StrEnum):
45
  VISUAL = 'visual'
46
  AURAL = 'aural'
47
  VIRTUAL = 'virtual'
 
 
48
 
49
 
50
  class LLMType(StrEnum):
 
45
  VISUAL = 'visual'
46
  AURAL = 'aural'
47
  VIRTUAL = 'virtual'
48
+ FOLDER = 'folder'
49
+ OTHER = "other"
50
 
51
 
52
  class LLMType(StrEnum):
api/db/db_models.py CHANGED
@@ -669,6 +669,61 @@ class Document(DataBaseModel):
669
  db_table = "document"
670
 
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  class Task(DataBaseModel):
673
  id = CharField(max_length=32, primary_key=True)
674
  doc_id = CharField(max_length=32, null=False, index=True)
 
669
  db_table = "document"
670
 
671
 
672
+ class File(DataBaseModel):
673
+ id = CharField(
674
+ max_length=32,
675
+ primary_key=True,
676
+ )
677
+ parent_id = CharField(
678
+ max_length=32,
679
+ null=False,
680
+ help_text="parent folder id",
681
+ index=True)
682
+ tenant_id = CharField(
683
+ max_length=32,
684
+ null=False,
685
+ help_text="tenant id",
686
+ index=True)
687
+ created_by = CharField(
688
+ max_length=32,
689
+ null=False,
690
+ help_text="who created it")
691
+ name = CharField(
692
+ max_length=255,
693
+ null=False,
694
+ help_text="file name or folder name",
695
+ index=True)
696
+ location = CharField(
697
+ max_length=255,
698
+ null=True,
699
+ help_text="where dose it store")
700
+ size = IntegerField(default=0)
701
+ type = CharField(max_length=32, null=False, help_text="file extension")
702
+
703
+ class Meta:
704
+ db_table = "file"
705
+
706
+
707
+ class File2Document(DataBaseModel):
708
+ id = CharField(
709
+ max_length=32,
710
+ primary_key=True,
711
+ )
712
+ file_id = CharField(
713
+ max_length=32,
714
+ null=True,
715
+ help_text="file id",
716
+ index=True)
717
+ document_id = CharField(
718
+ max_length=32,
719
+ null=True,
720
+ help_text="document id",
721
+ index=True)
722
+
723
+ class Meta:
724
+ db_table = "file2document"
725
+
726
+
727
  class Task(DataBaseModel):
728
  id = CharField(max_length=32, primary_key=True)
729
  doc_id = CharField(max_length=32, null=False, index=True)
api/db/services/document_service.py CHANGED
@@ -15,6 +15,11 @@
15
  #
16
  from peewee import Expression
17
 
 
 
 
 
 
18
  from api.db import FileType, TaskStatus
19
  from api.db.db_models import DB, Knowledgebase, Tenant
20
  from api.db.db_models import Document
@@ -69,6 +74,20 @@ class DocumentService(CommonService):
69
  raise RuntimeError("Database error (Knowledgebase)!")
70
  return cls.delete_by_id(doc.id)
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  @classmethod
73
  @DB.connection_context()
74
  def get_newly_uploaded(cls, tm, mod=0, comm=1, items_per_page=64):
 
15
  #
16
  from peewee import Expression
17
 
18
+ from elasticsearch_dsl import Q
19
+ from rag.utils import ELASTICSEARCH
20
+ from rag.utils.minio_conn import MINIO
21
+ from rag.nlp import search
22
+
23
  from api.db import FileType, TaskStatus
24
  from api.db.db_models import DB, Knowledgebase, Tenant
25
  from api.db.db_models import Document
 
74
  raise RuntimeError("Database error (Knowledgebase)!")
75
  return cls.delete_by_id(doc.id)
76
 
77
+ @classmethod
78
+ @DB.connection_context()
79
+ def remove_document(cls, doc, tenant_id):
80
+ ELASTICSEARCH.deleteByQuery(
81
+ Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
82
+
83
+ cls.increment_chunk_num(
84
+ doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
85
+ if not cls.delete(doc):
86
+ raise RuntimeError("Database error (Document removal)!")
87
+
88
+ MINIO.rm(doc.kb_id, doc.location)
89
+ return cls.delete_by_id(doc.id)
90
+
91
  @classmethod
92
  @DB.connection_context()
93
  def get_newly_uploaded(cls, tm, mod=0, comm=1, items_per_page=64):
api/db/services/file2document_service.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from datetime import datetime
17
+
18
+ from api.db.db_models import DB
19
+ from api.db.db_models import File, Document, File2Document
20
+ from api.db.services.common_service import CommonService
21
+ from api.utils import current_timestamp, datetime_format
22
+
23
+
24
+ class File2DocumentService(CommonService):
25
+ model = File2Document
26
+
27
+ @classmethod
28
+ @DB.connection_context()
29
+ def get_by_file_id(cls, file_id):
30
+ objs = cls.model.select().where(cls.model.file_id == file_id)
31
+ return objs
32
+
33
+ @classmethod
34
+ @DB.connection_context()
35
+ def get_by_document_id(cls, document_id):
36
+ objs = cls.model.select().where(cls.model.document_id == document_id)
37
+ return objs
38
+
39
+ @classmethod
40
+ @DB.connection_context()
41
+ def insert(cls, obj):
42
+ if not cls.save(**obj):
43
+ raise RuntimeError("Database error (File)!")
44
+ e, obj = cls.get_by_id(obj["id"])
45
+ if not e:
46
+ raise RuntimeError("Database error (File retrieval)!")
47
+ return obj
48
+
49
+ @classmethod
50
+ @DB.connection_context()
51
+ def delete_by_file_id(cls, file_id):
52
+ return cls.model.delete().where(cls.model.file_id == file_id).execute()
53
+
54
+ @classmethod
55
+ @DB.connection_context()
56
+ def delete_by_document_id(cls, doc_id):
57
+ return cls.model.delete().where(cls.model.document_id == doc_id).execute()
58
+
59
+ @classmethod
60
+ @DB.connection_context()
61
+ def update_by_file_id(cls, file_id, obj):
62
+ obj["update_time"] = current_timestamp()
63
+ obj["update_date"] = datetime_format(datetime.now())
64
+ num = cls.model.update(obj).where(cls.model.id == file_id).execute()
65
+ e, obj = cls.get_by_id(cls.model.id)
66
+ return obj
api/db/services/file_service.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from flask_login import current_user
17
+ from peewee import fn
18
+
19
+ from api.db import FileType
20
+ from api.db.db_models import DB, File2Document, Knowledgebase
21
+ from api.db.db_models import File, Document
22
+ from api.db.services.common_service import CommonService
23
+ from api.utils import get_uuid
24
+ from rag.utils import MINIO
25
+
26
+
27
+ class FileService(CommonService):
28
+ model = File
29
+
30
+ @classmethod
31
+ @DB.connection_context()
32
+ def get_by_pf_id(cls, tenant_id, pf_id, page_number, items_per_page,
33
+ orderby, desc, keywords):
34
+ if keywords:
35
+ files = cls.model.select().where(
36
+ (cls.model.tenant_id == tenant_id)
37
+ & (cls.model.parent_id == pf_id), (fn.LOWER(cls.model.name).like(f"%%{keywords.lower()}%%")))
38
+ else:
39
+ files = cls.model.select().where((cls.model.tenant_id == tenant_id)
40
+ & (cls.model.parent_id == pf_id))
41
+ count = files.count()
42
+ if desc:
43
+ files = files.order_by(cls.model.getter_by(orderby).desc())
44
+ else:
45
+ files = files.order_by(cls.model.getter_by(orderby).asc())
46
+
47
+ files = files.paginate(page_number, items_per_page)
48
+
49
+ res_files = list(files.dicts())
50
+ for file in res_files:
51
+ if file["type"] == FileType.FOLDER.value:
52
+ file["size"] = cls.get_folder_size(file["id"])
53
+ file['kbs_info'] = []
54
+ continue
55
+ kbs_info = cls.get_kb_id_by_file_id(file['id'])
56
+ file['kbs_info'] = kbs_info
57
+
58
+ return res_files, count
59
+
60
+ @classmethod
61
+ @DB.connection_context()
62
+ def get_kb_id_by_file_id(cls, file_id):
63
+ kbs = (cls.model.select(*[Knowledgebase.id, Knowledgebase.name])
64
+ .join(File2Document, on=(File2Document.file_id == file_id))
65
+ .join(Document, on=(File2Document.document_id == Document.id))
66
+ .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
67
+ .where(cls.model.id == file_id))
68
+ if not kbs: return []
69
+ kbs_info_list = []
70
+ for kb in list(kbs.dicts()):
71
+ kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
72
+ return kbs_info_list
73
+
74
+ @classmethod
75
+ @DB.connection_context()
76
+ def get_by_pf_id_name(cls, id, name):
77
+ file = cls.model.select().where((cls.model.parent_id == id) & (cls.model.name == name))
78
+ if file.count():
79
+ e, file = cls.get_by_id(file[0].id)
80
+ if not e:
81
+ raise RuntimeError("Database error (File retrieval)!")
82
+ return file
83
+ return None
84
+
85
+ @classmethod
86
+ @DB.connection_context()
87
+ def get_id_list_by_id(cls, id, name, count, res):
88
+ if count < len(name):
89
+ file = cls.get_by_pf_id_name(id, name[count])
90
+ if file:
91
+ res.append(file.id)
92
+ return cls.get_id_list_by_id(file.id, name, count + 1, res)
93
+ else:
94
+ return res
95
+ else:
96
+ return res
97
+
98
+ @classmethod
99
+ @DB.connection_context()
100
+ def get_all_innermost_file_ids(cls, folder_id, result_ids):
101
+ subfolders = cls.model.select().where(cls.model.parent_id == folder_id)
102
+ if subfolders.exists():
103
+ for subfolder in subfolders:
104
+ cls.get_all_innermost_file_ids(subfolder.id, result_ids)
105
+ else:
106
+ result_ids.append(folder_id)
107
+ return result_ids
108
+
109
+ @classmethod
110
+ @DB.connection_context()
111
+ def create_folder(cls, file, parent_id, name, count):
112
+ if count > len(name) - 2:
113
+ return file
114
+ else:
115
+ file = cls.insert({
116
+ "id": get_uuid(),
117
+ "parent_id": parent_id,
118
+ "tenant_id": current_user.id,
119
+ "created_by": current_user.id,
120
+ "name": name[count],
121
+ "location": "",
122
+ "size": 0,
123
+ "type": FileType.FOLDER
124
+ })
125
+ return cls.create_folder(file, file.id, name, count + 1)
126
+
127
+ @classmethod
128
+ @DB.connection_context()
129
+ def is_parent_folder_exist(cls, parent_id):
130
+ parent_files = cls.model.select().where(cls.model.id == parent_id)
131
+ if parent_files.count():
132
+ return True
133
+ cls.delete_folder_by_pf_id(parent_id)
134
+ return False
135
+
136
+ @classmethod
137
+ @DB.connection_context()
138
+ def get_root_folder(cls, tenant_id):
139
+ file = cls.model.select().where(cls.model.tenant_id == tenant_id and
140
+ cls.model.parent_id == cls.model.id)
141
+ e, file = cls.get_by_id(file[0].id)
142
+ if not e:
143
+ raise RuntimeError("Database error (File retrieval)!")
144
+ return file
145
+
146
+ @classmethod
147
+ @DB.connection_context()
148
+ def get_parent_folder(cls, file_id):
149
+ file = cls.model.select().where(cls.model.id == file_id)
150
+ if file.count():
151
+ e, file = cls.get_by_id(file[0].parent_id)
152
+ if not e:
153
+ raise RuntimeError("Database error (File retrieval)!")
154
+ else:
155
+ raise RuntimeError("Database error (File doesn't exist)!")
156
+ return file
157
+
158
+ @classmethod
159
+ @DB.connection_context()
160
+ def get_all_parent_folders(cls, start_id):
161
+ parent_folders = []
162
+ current_id = start_id
163
+ while current_id:
164
+ e, file = cls.get_by_id(current_id)
165
+ if file.parent_id != file.id and e:
166
+ parent_folders.append(file)
167
+ current_id = file.parent_id
168
+ else:
169
+ parent_folders.append(file)
170
+ break
171
+ return parent_folders
172
+
173
+ @classmethod
174
+ @DB.connection_context()
175
+ def insert(cls, file):
176
+ if not cls.save(**file):
177
+ raise RuntimeError("Database error (File)!")
178
+ e, file = cls.get_by_id(file["id"])
179
+ if not e:
180
+ raise RuntimeError("Database error (File retrieval)!")
181
+ return file
182
+
183
+ @classmethod
184
+ @DB.connection_context()
185
+ def delete(cls, file):
186
+ return cls.delete_by_id(file.id)
187
+
188
+ @classmethod
189
+ @DB.connection_context()
190
+ def delete_by_pf_id(cls, folder_id):
191
+ return cls.model.delete().where(cls.model.parent_id == folder_id).execute()
192
+
193
+ @classmethod
194
+ @DB.connection_context()
195
+ def delete_folder_by_pf_id(cls, user_id, folder_id):
196
+ try:
197
+ files = cls.model.select().where((cls.model.tenant_id == user_id)
198
+ & (cls.model.parent_id == folder_id))
199
+ for file in files:
200
+ cls.delete_folder_by_pf_id(user_id, file.id)
201
+ return cls.model.delete().where((cls.model.tenant_id == user_id)
202
+ & (cls.model.id == folder_id)).execute(),
203
+ except Exception as e:
204
+ print(e)
205
+ raise RuntimeError("Database error (File retrieval)!")
206
+
207
+ @classmethod
208
+ @DB.connection_context()
209
+ def get_file_count(cls, tenant_id):
210
+ files = cls.model.select(cls.model.id).where(cls.model.tenant_id == tenant_id)
211
+ return len(files)
212
+
213
+ @classmethod
214
+ @DB.connection_context()
215
+ def get_folder_size(cls, folder_id):
216
+ size = 0
217
+ def dfs(parent_id):
218
+ nonlocal size
219
+ for f in cls.model.select(*[cls.model.id, cls.model.size, cls.model.type]).where(cls.model.parent_id == parent_id):
220
+ size += f.size
221
+ if f.type == FileType.FOLDER.value:
222
+ dfs(f.id)
223
+
224
+ dfs(folder_id)
225
+ return size
api/utils/file_utils.py CHANGED
@@ -155,7 +155,9 @@ def filename_type(filename):
155
  return FileType.AURAL.value
156
 
157
  if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
158
- return FileType.VISUAL
 
 
159
 
160
 
161
  def thumbnail(filename, blob):
 
155
  return FileType.AURAL.value
156
 
157
  if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
158
+ return FileType.VISUAL.value
159
+
160
+ return FileType.OTHER.value
161
 
162
 
163
  def thumbnail(filename, blob):