H
commited on
Commit
·
73c78d3
1
Parent(s):
349437c
Fix graphrag callback (#1806)
Browse files### What problem does this PR solve?
#1800
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- graphrag/index.py +2 -2
- rag/app/knowledge_graph.py +2 -2
graphrag/index.py
CHANGED
@@ -45,7 +45,7 @@ def graph_merge(g1, g2):
|
|
45 |
g = g2.copy()
|
46 |
for n, attr in g1.nodes(data=True):
|
47 |
if n not in g2.nodes():
|
48 |
-
|
49 |
continue
|
50 |
|
51 |
g.nodes[n]["weight"] += 1
|
@@ -75,7 +75,7 @@ def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, ent
|
|
75 |
cnt = 0
|
76 |
threads = []
|
77 |
exe = ThreadPoolExecutor(max_workers=12)
|
78 |
-
for i in range(len(chunks
|
79 |
tkn_cnt = num_tokens_from_string(chunks[i])
|
80 |
if cnt+tkn_cnt >= left_token_count and texts:
|
81 |
threads.append(exe.submit(ext, texts, {"entity_types": entity_types}))
|
|
|
45 |
g = g2.copy()
|
46 |
for n, attr in g1.nodes(data=True):
|
47 |
if n not in g2.nodes():
|
48 |
+
g.add_node(n, **attr)
|
49 |
continue
|
50 |
|
51 |
g.nodes[n]["weight"] += 1
|
|
|
75 |
cnt = 0
|
76 |
threads = []
|
77 |
exe = ThreadPoolExecutor(max_workers=12)
|
78 |
+
for i in range(len(chunks)):
|
79 |
tkn_cnt = num_tokens_from_string(chunks[i])
|
80 |
if cnt+tkn_cnt >= left_token_count and texts:
|
81 |
threads.append(exe.submit(ext, texts, {"entity_types": entity_types}))
|
rag/app/knowledge_graph.py
CHANGED
@@ -13,7 +13,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|
13 |
eng = lang.lower() == "english"
|
14 |
|
15 |
parser_config["layout_recognize"] = False
|
16 |
-
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, parser_config=parser_config)
|
17 |
chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
|
18 |
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
|
19 |
)
|
@@ -27,4 +27,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|
27 |
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
28 |
chunks.extend(tokenize_chunks(sections, doc, eng))
|
29 |
|
30 |
-
return chunks
|
|
|
13 |
eng = lang.lower() == "english"
|
14 |
|
15 |
parser_config["layout_recognize"] = False
|
16 |
+
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, callback=callback ,parser_config=parser_config)
|
17 |
chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
|
18 |
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
|
19 |
)
|
|
|
27 |
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
28 |
chunks.extend(tokenize_chunks(sections, doc, eng))
|
29 |
|
30 |
+
return chunks
|