Kevin Hu
commited on
Commit
·
b726d64
1
Parent(s):
27a0991
set DLA active for KG (#3386)
Browse files### What problem does this PR solve?
### Type of change
- [x] Refactoring
- api/apps/document_app.py +1 -1
- rag/app/knowledge_graph.py +2 -2
api/apps/document_app.py
CHANGED
@@ -537,7 +537,7 @@ def parse():
|
|
537 |
options.add_argument('--disable-dev-shm-usage')
|
538 |
driver = Chrome(options=options)
|
539 |
driver.get(url)
|
540 |
-
sections = RAGFlowHtmlParser()(driver.page_source)
|
541 |
return get_json_result(data="\n".join(sections))
|
542 |
|
543 |
if 'file' not in request.files:
|
|
|
537 |
options.add_argument('--disable-dev-shm-usage')
|
538 |
driver = Chrome(options=options)
|
539 |
driver.get(url)
|
540 |
+
sections = RAGFlowHtmlParser()("", binary=driver.page_source)
|
541 |
return get_json_result(data="\n".join(sections))
|
542 |
|
543 |
if 'file' not in request.files:
|
rag/app/knowledge_graph.py
CHANGED
@@ -9,10 +9,10 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|
9 |
lang="Chinese", callback=None, **kwargs):
|
10 |
parser_config = kwargs.get(
|
11 |
"parser_config", {
|
12 |
-
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize":
|
13 |
eng = lang.lower() == "english"
|
14 |
|
15 |
-
parser_config["layout_recognize"] =
|
16 |
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
17 |
parser_config=parser_config, callback=callback)
|
18 |
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|
|
|
9 |
lang="Chinese", callback=None, **kwargs):
|
10 |
parser_config = kwargs.get(
|
11 |
"parser_config", {
|
12 |
+
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
|
13 |
eng = lang.lower() == "english"
|
14 |
|
15 |
+
parser_config["layout_recognize"] = True
|
16 |
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
17 |
parser_config=parser_config, callback=callback)
|
18 |
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|