Spaces:

retopara
/

ragflow

Build error

App Files Files Community

KevinHuSh commited on Mar 15, 2024

Commit

2ef1d8e

1 Parent(s): 79f4fcc

resolve table issues (#125)

Browse files

Files changed (7) hide show

Dockerfile +0 -1
Dockerfile.cuda +0 -1
api/apps/conversation_app.py +1 -0
api/db/init_data.py +1 -1
api/db/services/knowledgebase_service.py +1 -1
rag/app/table.py +10 -8
rag/utils/__init__.py +2 -2

Dockerfile CHANGED Viewed

@@ -14,7 +14,6 @@ ADD ./rag ./rag
 ENV PYTHONPATH=/ragflow/
 ENV HF_ENDPOINT=https://hf-mirror.com
-/root/miniconda3/envs/py11/bin/pip install peewee==3.17.1
 ADD docker/entrypoint.sh ./entrypoint.sh
 RUN chmod +x ./entrypoint.sh

 ENV PYTHONPATH=/ragflow/
 ENV HF_ENDPOINT=https://hf-mirror.com
 ADD docker/entrypoint.sh ./entrypoint.sh
 RUN chmod +x ./entrypoint.sh

Dockerfile.cuda CHANGED Viewed

@@ -19,7 +19,6 @@ ADD ./rag ./rag
 ENV PYTHONPATH=/ragflow/
 ENV HF_ENDPOINT=https://hf-mirror.com
-/root/miniconda3/envs/py11/bin/pip install peewee==3.17.1
 ADD docker/entrypoint.sh ./entrypoint.sh
 RUN chmod +x ./entrypoint.sh

 ENV PYTHONPATH=/ragflow/
 ENV HF_ENDPOINT=https://hf-mirror.com
 ADD docker/entrypoint.sh ./entrypoint.sh
 RUN chmod +x ./entrypoint.sh

api/apps/conversation_app.py CHANGED Viewed

@@ -309,6 +309,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
     # compose markdown table
     clmns = "|"+"|".join([re.sub(r"(/.*|（[^（）]+）)", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in clmn_idx]) + ("|原文|" if docid_idx and docid_idx else "|")
     line = "|"+"|".join(["------" for _ in range(len(clmn_idx))]) + ("|------|" if docid_idx and docid_idx else "")
     rows = ["|"+"|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
     if not docid_idx or not docnm_idx:
         chat_logger.warning("SQL missing field: " + sql)

     # compose markdown table
     clmns = "|"+"|".join([re.sub(r"(/.*|（[^（）]+）)", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in clmn_idx]) + ("|原文|" if docid_idx and docid_idx else "|")
     line = "|"+"|".join(["------" for _ in range(len(clmn_idx))]) + ("|------|" if docid_idx and docid_idx else "")
+    line = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}\|", "|", line)
     rows = ["|"+"|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
     if not docid_idx or not docnm_idx:
         chat_logger.warning("SQL missing field: " + sql)

api/db/init_data.py CHANGED Viewed

@@ -94,7 +94,7 @@ def init_llm_factory():
              "name": "Local",
              "logo": "",
              "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
-            "status": "0",
         },{
             "name": "Moonshot",
              "logo": "",

              "name": "Local",
              "logo": "",
              "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
+            "status": "1",
         },{
             "name": "Moonshot",
              "logo": "",

api/db/services/knowledgebase_service.py CHANGED Viewed

@@ -78,7 +78,7 @@ class KnowledgebaseService(CommonService):
                 if isinstance(v, dict):
                     assert isinstance(old[k], dict)
                     dfs_update(old[k], v)
-                if isinstance(v, list):
                     assert isinstance(old[k], list)
                     old[k] = list(set(old[k]+v))
                 else: old[k] = v

                 if isinstance(v, dict):
                     assert isinstance(old[k], dict)
                     dfs_update(old[k], v)
+                elif isinstance(v, list):
                     assert isinstance(old[k], list)
                     old[k] = list(set(old[k]+v))
                 else: old[k] = v

rag/app/table.py CHANGED Viewed

@@ -73,9 +73,9 @@ def trans_datatime(s):
 def trans_bool(s):
-    if re.match(r"(true|yes|是)$", str(s).strip(), flags=re.IGNORECASE):
         return ["yes", "是"]
-    if re.match(r"(false|no|否)$", str(s).strip(), flags=re.IGNORECASE):
         return ["no", "否"]
@@ -107,9 +107,9 @@ def column_data_type(arr):
             arr[i] = trans[ty](str(arr[i]))
         except Exception as e:
             arr[i] = None
-    if ty == "text":
-        if len(arr) > 128 and uni / len(arr) < 0.1:
-            ty = "keyword"
     return arr, ty
@@ -170,7 +170,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
     PY = Pinyin()
     fieds_map = {
         "text": "_tks",
-        "int": "_int",
         "keyword": "_kwd",
         "float": "_flt",
         "datetime": "_dt",
@@ -189,7 +189,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
             df[clmns[j]] = cln
             if ty == "text":
                 txts.extend([str(c) for c in cln if c])
-        clmns_map = [(py_clmns[i] + fieds_map[clmn_tys[i]], clmns[i].replace("_", " "))
                      for i in range(len(clmns))]
         eng = lang.lower() == "english"#is_english(txts)
@@ -204,6 +204,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
                     continue
                 if not str(row[clmns[j]]):
                     continue
                 fld = clmns_map[j][0]
                 d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
                     row[clmns[j]])
@@ -223,7 +225,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
 if __name__ == "__main__":
     import sys
-    def dummy(a, b):
         pass
     chunk(sys.argv[1], callback=dummy)

 def trans_bool(s):
+    if re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√)$", str(s).strip(), flags=re.IGNORECASE):
         return ["yes", "是"]
+    if re.match(r"(false|no|否|⍻|×)$", str(s).strip(), flags=re.IGNORECASE):
         return ["no", "否"]
             arr[i] = trans[ty](str(arr[i]))
         except Exception as e:
             arr[i] = None
+    #if ty == "text":
+    #    if len(arr) > 128 and uni / len(arr) < 0.1:
+    #        ty = "keyword"
     return arr, ty
     PY = Pinyin()
     fieds_map = {
         "text": "_tks",
+        "int": "_long",
         "keyword": "_kwd",
         "float": "_flt",
         "datetime": "_dt",
             df[clmns[j]] = cln
             if ty == "text":
                 txts.extend([str(c) for c in cln if c])
+        clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], clmns[i].replace("_", " "))
                      for i in range(len(clmns))]
         eng = lang.lower() == "english"#is_english(txts)
                     continue
                 if not str(row[clmns[j]]):
                     continue
+                if pd.isna(row[clmns[j]]):
+                    continue
                 fld = clmns_map[j][0]
                 d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
                     row[clmns[j]])
 if __name__ == "__main__":
     import sys
+    def dummy(prog=None, msg=""):
         pass
     chunk(sys.argv[1], callback=dummy)

rag/utils/__init__.py CHANGED Viewed

@@ -19,8 +19,8 @@ from .minio_conn import MINIO
 from .es_conn import ELASTICSEARCH
 def rmSpace(txt):
-    txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt)
-    return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt)
 def findMaxDt(fnm):

 from .es_conn import ELASTICSEARCH
 def rmSpace(txt):
+    txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt, flags=re.IGNORECASE)
+    return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt, flags=re.IGNORECASE)
 def findMaxDt(fnm):