Kevin Hu
commited on
Commit
·
42023af
1
Parent(s):
d1a0b33
fix parser for pptx of which files are from filemanager (#2482)
Browse files### What problem does this PR solve?
#2467
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
api/apps/file2document_app.py
CHANGED
|
@@ -77,7 +77,7 @@ def convert():
|
|
| 77 |
doc = DocumentService.insert({
|
| 78 |
"id": get_uuid(),
|
| 79 |
"kb_id": kb.id,
|
| 80 |
-
"parser_id": kb.parser_id,
|
| 81 |
"parser_config": kb.parser_config,
|
| 82 |
"created_by": current_user.id,
|
| 83 |
"type": file.type,
|
|
@@ -85,7 +85,6 @@ def convert():
|
|
| 85 |
"location": file.location,
|
| 86 |
"size": file.size
|
| 87 |
})
|
| 88 |
-
FileService.set_constant_parser(doc, file.name)
|
| 89 |
file2document = File2DocumentService.insert({
|
| 90 |
"id": get_uuid(),
|
| 91 |
"file_id": id,
|
|
|
|
| 77 |
doc = DocumentService.insert({
|
| 78 |
"id": get_uuid(),
|
| 79 |
"kb_id": kb.id,
|
| 80 |
+
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
|
| 81 |
"parser_config": kb.parser_config,
|
| 82 |
"created_by": current_user.id,
|
| 83 |
"type": file.type,
|
|
|
|
| 85 |
"location": file.location,
|
| 86 |
"size": file.size
|
| 87 |
})
|
|
|
|
| 88 |
file2document = File2DocumentService.insert({
|
| 89 |
"id": get_uuid(),
|
| 90 |
"file_id": id,
|
api/db/services/file_service.py
CHANGED
|
@@ -357,7 +357,7 @@ class FileService(CommonService):
|
|
| 357 |
doc = {
|
| 358 |
"id": get_uuid(),
|
| 359 |
"kb_id": kb.id,
|
| 360 |
-
"parser_id": kb.parser_id,
|
| 361 |
"parser_config": kb.parser_config,
|
| 362 |
"created_by": user_id,
|
| 363 |
"type": filetype,
|
|
@@ -366,7 +366,6 @@ class FileService(CommonService):
|
|
| 366 |
"size": len(blob),
|
| 367 |
"thumbnail": thumbnail(filename, blob)
|
| 368 |
}
|
| 369 |
-
self.set_constant_parser(doc, filename)
|
| 370 |
DocumentService.insert(doc)
|
| 371 |
|
| 372 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
|
@@ -377,12 +376,13 @@ class FileService(CommonService):
|
|
| 377 |
return err, files
|
| 378 |
|
| 379 |
@staticmethod
|
| 380 |
-
def
|
| 381 |
-
if
|
| 382 |
-
|
| 383 |
-
if
|
| 384 |
-
|
| 385 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 386 |
-
|
| 387 |
if re.search(r"\.(eml)$", filename):
|
| 388 |
-
|
|
|
|
|
|
| 357 |
doc = {
|
| 358 |
"id": get_uuid(),
|
| 359 |
"kb_id": kb.id,
|
| 360 |
+
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
| 361 |
"parser_config": kb.parser_config,
|
| 362 |
"created_by": user_id,
|
| 363 |
"type": filetype,
|
|
|
|
| 366 |
"size": len(blob),
|
| 367 |
"thumbnail": thumbnail(filename, blob)
|
| 368 |
}
|
|
|
|
| 369 |
DocumentService.insert(doc)
|
| 370 |
|
| 371 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
|
|
|
| 376 |
return err, files
|
| 377 |
|
| 378 |
@staticmethod
|
| 379 |
+
def get_parser(doc_type, filename, default):
|
| 380 |
+
if doc_type == FileType.VISUAL:
|
| 381 |
+
return ParserType.PICTURE.value
|
| 382 |
+
if doc_type == FileType.AURAL:
|
| 383 |
+
return ParserType.AUDIO.value
|
| 384 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 385 |
+
return ParserType.PRESENTATION.value
|
| 386 |
if re.search(r"\.(eml)$", filename):
|
| 387 |
+
return ParserType.EMAIL.value
|
| 388 |
+
return default
|