KevinHuSh commited on
Commit
2ef1d8e
·
1 Parent(s): 79f4fcc

resolve table issues (#125)

Browse files
Dockerfile CHANGED
@@ -14,7 +14,6 @@ ADD ./rag ./rag
14
  ENV PYTHONPATH=/ragflow/
15
  ENV HF_ENDPOINT=https://hf-mirror.com
16
 
17
- /root/miniconda3/envs/py11/bin/pip install peewee==3.17.1
18
  ADD docker/entrypoint.sh ./entrypoint.sh
19
  RUN chmod +x ./entrypoint.sh
20
 
 
14
  ENV PYTHONPATH=/ragflow/
15
  ENV HF_ENDPOINT=https://hf-mirror.com
16
 
 
17
  ADD docker/entrypoint.sh ./entrypoint.sh
18
  RUN chmod +x ./entrypoint.sh
19
 
Dockerfile.cuda CHANGED
@@ -19,7 +19,6 @@ ADD ./rag ./rag
19
  ENV PYTHONPATH=/ragflow/
20
  ENV HF_ENDPOINT=https://hf-mirror.com
21
 
22
- /root/miniconda3/envs/py11/bin/pip install peewee==3.17.1
23
  ADD docker/entrypoint.sh ./entrypoint.sh
24
  RUN chmod +x ./entrypoint.sh
25
 
 
19
  ENV PYTHONPATH=/ragflow/
20
  ENV HF_ENDPOINT=https://hf-mirror.com
21
 
 
22
  ADD docker/entrypoint.sh ./entrypoint.sh
23
  RUN chmod +x ./entrypoint.sh
24
 
api/apps/conversation_app.py CHANGED
@@ -309,6 +309,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
309
  # compose markdown table
310
  clmns = "|"+"|".join([re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in clmn_idx]) + ("|原文|" if docid_idx and docid_idx else "|")
311
  line = "|"+"|".join(["------" for _ in range(len(clmn_idx))]) + ("|------|" if docid_idx and docid_idx else "")
 
312
  rows = ["|"+"|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
313
  if not docid_idx or not docnm_idx:
314
  chat_logger.warning("SQL missing field: " + sql)
 
309
  # compose markdown table
310
  clmns = "|"+"|".join([re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], tbl["columns"][i]["name"])) for i in clmn_idx]) + ("|原文|" if docid_idx and docid_idx else "|")
311
  line = "|"+"|".join(["------" for _ in range(len(clmn_idx))]) + ("|------|" if docid_idx and docid_idx else "")
312
+ line = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}\|", "|", line)
313
  rows = ["|"+"|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + "|" for r in tbl["rows"]]
314
  if not docid_idx or not docnm_idx:
315
  chat_logger.warning("SQL missing field: " + sql)
api/db/init_data.py CHANGED
@@ -94,7 +94,7 @@ def init_llm_factory():
94
  "name": "Local",
95
  "logo": "",
96
  "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
97
- "status": "0",
98
  },{
99
  "name": "Moonshot",
100
  "logo": "",
 
94
  "name": "Local",
95
  "logo": "",
96
  "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
97
+ "status": "1",
98
  },{
99
  "name": "Moonshot",
100
  "logo": "",
api/db/services/knowledgebase_service.py CHANGED
@@ -78,7 +78,7 @@ class KnowledgebaseService(CommonService):
78
  if isinstance(v, dict):
79
  assert isinstance(old[k], dict)
80
  dfs_update(old[k], v)
81
- if isinstance(v, list):
82
  assert isinstance(old[k], list)
83
  old[k] = list(set(old[k]+v))
84
  else: old[k] = v
 
78
  if isinstance(v, dict):
79
  assert isinstance(old[k], dict)
80
  dfs_update(old[k], v)
81
+ elif isinstance(v, list):
82
  assert isinstance(old[k], list)
83
  old[k] = list(set(old[k]+v))
84
  else: old[k] = v
rag/app/table.py CHANGED
@@ -73,9 +73,9 @@ def trans_datatime(s):
73
 
74
 
75
  def trans_bool(s):
76
- if re.match(r"(true|yes|是)$", str(s).strip(), flags=re.IGNORECASE):
77
  return ["yes", "是"]
78
- if re.match(r"(false|no|否)$", str(s).strip(), flags=re.IGNORECASE):
79
  return ["no", "否"]
80
 
81
 
@@ -107,9 +107,9 @@ def column_data_type(arr):
107
  arr[i] = trans[ty](str(arr[i]))
108
  except Exception as e:
109
  arr[i] = None
110
- if ty == "text":
111
- if len(arr) > 128 and uni / len(arr) < 0.1:
112
- ty = "keyword"
113
  return arr, ty
114
 
115
 
@@ -170,7 +170,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
170
  PY = Pinyin()
171
  fieds_map = {
172
  "text": "_tks",
173
- "int": "_int",
174
  "keyword": "_kwd",
175
  "float": "_flt",
176
  "datetime": "_dt",
@@ -189,7 +189,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
189
  df[clmns[j]] = cln
190
  if ty == "text":
191
  txts.extend([str(c) for c in cln if c])
192
- clmns_map = [(py_clmns[i] + fieds_map[clmn_tys[i]], clmns[i].replace("_", " "))
193
  for i in range(len(clmns))]
194
 
195
  eng = lang.lower() == "english"#is_english(txts)
@@ -204,6 +204,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
204
  continue
205
  if not str(row[clmns[j]]):
206
  continue
 
 
207
  fld = clmns_map[j][0]
208
  d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
209
  row[clmns[j]])
@@ -223,7 +225,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
223
  if __name__ == "__main__":
224
  import sys
225
 
226
- def dummy(a, b):
227
  pass
228
 
229
  chunk(sys.argv[1], callback=dummy)
 
73
 
74
 
75
  def trans_bool(s):
76
+ if re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√)$", str(s).strip(), flags=re.IGNORECASE):
77
  return ["yes", "是"]
78
+ if re.match(r"(false|no|否|⍻|×)$", str(s).strip(), flags=re.IGNORECASE):
79
  return ["no", "否"]
80
 
81
 
 
107
  arr[i] = trans[ty](str(arr[i]))
108
  except Exception as e:
109
  arr[i] = None
110
+ #if ty == "text":
111
+ # if len(arr) > 128 and uni / len(arr) < 0.1:
112
+ # ty = "keyword"
113
  return arr, ty
114
 
115
 
 
170
  PY = Pinyin()
171
  fieds_map = {
172
  "text": "_tks",
173
+ "int": "_long",
174
  "keyword": "_kwd",
175
  "float": "_flt",
176
  "datetime": "_dt",
 
189
  df[clmns[j]] = cln
190
  if ty == "text":
191
  txts.extend([str(c) for c in cln if c])
192
+ clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], clmns[i].replace("_", " "))
193
  for i in range(len(clmns))]
194
 
195
  eng = lang.lower() == "english"#is_english(txts)
 
204
  continue
205
  if not str(row[clmns[j]]):
206
  continue
207
+ if pd.isna(row[clmns[j]]):
208
+ continue
209
  fld = clmns_map[j][0]
210
  d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
211
  row[clmns[j]])
 
225
  if __name__ == "__main__":
226
  import sys
227
 
228
+ def dummy(prog=None, msg=""):
229
  pass
230
 
231
  chunk(sys.argv[1], callback=dummy)
rag/utils/__init__.py CHANGED
@@ -19,8 +19,8 @@ from .minio_conn import MINIO
19
  from .es_conn import ELASTICSEARCH
20
 
21
  def rmSpace(txt):
22
- txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt)
23
- return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt)
24
 
25
 
26
  def findMaxDt(fnm):
 
19
  from .es_conn import ELASTICSEARCH
20
 
21
  def rmSpace(txt):
22
+ txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt, flags=re.IGNORECASE)
23
+ return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt, flags=re.IGNORECASE)
24
 
25
 
26
  def findMaxDt(fnm):