Kevin Hu commited on
Commit
b726d64
·
1 Parent(s): 27a0991

set DLA active for KG (#3386)

Browse files

### What problem does this PR solve?

### Type of change


- [x] Refactoring

api/apps/document_app.py CHANGED
@@ -537,7 +537,7 @@ def parse():
537
  options.add_argument('--disable-dev-shm-usage')
538
  driver = Chrome(options=options)
539
  driver.get(url)
540
- sections = RAGFlowHtmlParser()(driver.page_source)
541
  return get_json_result(data="\n".join(sections))
542
 
543
  if 'file' not in request.files:
 
537
  options.add_argument('--disable-dev-shm-usage')
538
  driver = Chrome(options=options)
539
  driver.get(url)
540
+ sections = RAGFlowHtmlParser()("", binary=driver.page_source)
541
  return get_json_result(data="\n".join(sections))
542
 
543
  if 'file' not in request.files:
rag/app/knowledge_graph.py CHANGED
@@ -9,10 +9,10 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
9
  lang="Chinese", callback=None, **kwargs):
10
  parser_config = kwargs.get(
11
  "parser_config", {
12
- "chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": False})
13
  eng = lang.lower() == "english"
14
 
15
- parser_config["layout_recognize"] = False
16
  sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
17
  parser_config=parser_config, callback=callback)
18
  chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
 
9
  lang="Chinese", callback=None, **kwargs):
10
  parser_config = kwargs.get(
11
  "parser_config", {
12
+ "chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
13
  eng = lang.lower() == "english"
14
 
15
+ parser_config["layout_recognize"] = True
16
  sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
17
  parser_config=parser_config, callback=callback)
18
  chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,