Spaces:

retopara
/

ragflow

Build error

Kevin Hu commited on Aug 5, 2024

Commit

3b7343c

1 Parent(s): 73c78d3

refine mindmap prompt (#1808)

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (4) hide show

api/db/services/document_service.py CHANGED Viewed

@@ -142,7 +142,7 @@ class DocumentService(CommonService):
     @classmethod
     @DB.connection_context()
     def get_unfinished_docs(cls):
-        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg]
         docs = cls.model.select(*fields) \
             .where(
                 cls.model.status == StatusEnum.VALID.value,
@@ -311,7 +311,7 @@ class DocumentService(CommonService):
                 prg = 0
                 finished = True
                 bad = 0
-                status = TaskStatus.RUNNING.value
                 for t in tsks:
                     if 0 <= t.progress < 1:
                         finished = False

     @classmethod
     @DB.connection_context()
     def get_unfinished_docs(cls):
+        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run]
         docs = cls.model.select(*fields) \
             .where(
                 cls.model.status == StatusEnum.VALID.value,
                 prg = 0
                 finished = True
                 bad = 0
+                status = d["run"]#TaskStatus.RUNNING.value
                 for t in tsks:
                     if 0 <= t.progress < 1:
                         finished = False

conf/llm_factories.json CHANGED Viewed

@@ -92,12 +92,6 @@
                     "max_tokens": 32768,
                     "model_type": "chat"
                 },
-                {
-                    "llm_name": "qwen-max-1201",
-                    "tags": "LLM,CHAT,6K",
-                    "max_tokens": 5899,
-                    "model_type": "chat"
-                },
                 {
                     "llm_name": "text-embedding-v2",
                     "tags": "TEXT EMBEDDING,2K",

                     "max_tokens": 32768,
                     "model_type": "chat"
                 },
                 {
                     "llm_name": "text-embedding-v2",
                     "tags": "TEXT EMBEDDING,2K",

graphrag/mind_map_prompt.py CHANGED Viewed

@@ -22,7 +22,6 @@ MIND_MAP_EXTRACTION_PROMPT = """
    3. If the subject matter is really complex, split them into sub-sections.
  - Output requirement:
-   - Always try to maximize the number of sub-sections.
    - In language of
    - MUST IN FORMAT OF MARKDOWN

    3. If the subject matter is really complex, split them into sub-sections.
  - Output requirement:
    - In language of
    - MUST IN FORMAT OF MARKDOWN

rag/app/knowledge_graph.py CHANGED Viewed

@@ -13,7 +13,8 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
     eng = lang.lower() == "english"
     parser_config["layout_recognize"] = False
-    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, callback=callback ,parser_config=parser_config)
     chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
                                          parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
                                          )
@@ -27,4 +28,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
     doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
     chunks.extend(tokenize_chunks(sections, doc, eng))
-    return chunks

     eng = lang.lower() == "english"
     parser_config["layout_recognize"] = False
+    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
+                           parser_config=parser_config, callback=callback)
     chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
                                          parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
                                          )
     doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
     chunks.extend(tokenize_chunks(sections, doc, eng))
+    return chunks