Kevin Hu commited on
Commit
fe09cc5
·
1 Parent(s): b80e2f3

Add meta data while chatting. (#4455)

Browse files

### What problem does this PR solve?

#3690

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Files changed (1) hide show
  1. api/db/services/dialog_service.py +12 -8
api/db/services/dialog_service.py CHANGED
@@ -26,6 +26,7 @@ from datetime import timedelta
26
  from api.db import LLMType, ParserType, StatusEnum
27
  from api.db.db_models import Dialog, DB
28
  from api.db.services.common_service import CommonService
 
29
  from api.db.services.knowledgebase_service import KnowledgebaseService
30
  from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
31
  from api import settings
@@ -122,18 +123,21 @@ def kb_prompt(kbinfos, max_tokens):
122
  knowledges = knowledges[:i]
123
  break
124
 
125
- #docs = DocumentService.get_by_ids([ck["doc_id"] for ck in kbinfos["chunks"][:chunks_num]])
126
- #docs = {d.id: d.meta_fields for d in docs}
127
 
128
- doc2chunks = defaultdict(list)
129
  for ck in kbinfos["chunks"][:chunks_num]:
130
- doc2chunks[ck["docnm_kwd"]].append(ck["content_with_weight"])
 
131
 
132
  knowledges = []
133
- for nm, chunks in doc2chunks.items():
134
  txt = f"Document: {nm} \n"
135
- txt += "Contains the following relevant fragments:\n"
136
- for i, chunk in enumerate(chunks, 1):
 
 
137
  txt += f"{i}. {chunk}\n"
138
  knowledges.append(txt)
139
  return knowledges
@@ -283,7 +287,7 @@ def chat(dialog, messages, stream=True, **kwargs):
283
  yield {"answer": empty_res, "reference": kbinfos, "audio_binary": tts(tts_mdl, empty_res)}
284
  return {"answer": prompt_config["empty_response"], "reference": kbinfos}
285
 
286
- kwargs["knowledge"] = "\n\n------\n\n".join(knowledges)
287
  gen_conf = dialog.llm_setting
288
 
289
  msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
 
26
  from api.db import LLMType, ParserType, StatusEnum
27
  from api.db.db_models import Dialog, DB
28
  from api.db.services.common_service import CommonService
29
+ from api.db.services.document_service import DocumentService
30
  from api.db.services.knowledgebase_service import KnowledgebaseService
31
  from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
32
  from api import settings
 
123
  knowledges = knowledges[:i]
124
  break
125
 
126
+ docs = DocumentService.get_by_ids([ck["doc_id"] for ck in kbinfos["chunks"][:chunks_num]])
127
+ docs = {d.id: d.meta_fields for d in docs}
128
 
129
+ doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []})
130
  for ck in kbinfos["chunks"][:chunks_num]:
131
+ doc2chunks[ck["docnm_kwd"]]["chunks"].append(ck["content_with_weight"])
132
+ doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {})
133
 
134
  knowledges = []
135
+ for nm, cks_meta in doc2chunks.items():
136
  txt = f"Document: {nm} \n"
137
+ for k,v in cks_meta["meta"].items():
138
+ txt += f"{k}: {v}\n"
139
+ txt += "Relevant fragments as following:\n"
140
+ for i, chunk in enumerate(cks_meta["chunks"], 1):
141
  txt += f"{i}. {chunk}\n"
142
  knowledges.append(txt)
143
  return knowledges
 
287
  yield {"answer": empty_res, "reference": kbinfos, "audio_binary": tts(tts_mdl, empty_res)}
288
  return {"answer": prompt_config["empty_response"], "reference": kbinfos}
289
 
290
+ kwargs["knowledge"] = "\n------\n" + "\n\n------\n\n".join(knowledges)
291
  gen_conf = dialog.llm_setting
292
 
293
  msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]