Kevin Hu
commited on
Commit
·
8db6538
1
Parent(s):
8b00b96
Support debug components. (#3994)
Browse files### What problem does this PR solve?
#3993
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- agent/canvas.py +2 -1
- agent/component/base.py +14 -5
- agent/component/begin.py +1 -1
- agent/component/generate.py +15 -3
- api/apps/canvas_app.py +26 -2
- api/apps/conversation_app.py +2 -0
- api/apps/document_app.py +1 -1
- rag/svr/task_executor.py +2 -2
agent/canvas.py
CHANGED
|
@@ -211,6 +211,7 @@ class Canvas(ABC):
|
|
| 211 |
except Exception as e:
|
| 212 |
logging.exception(f"Canvas.run got exception: {e}")
|
| 213 |
self.path[-1].append(c)
|
|
|
|
| 214 |
raise e
|
| 215 |
self.path[-1].append(c)
|
| 216 |
ran += 1
|
|
@@ -330,4 +331,4 @@ class Canvas(ABC):
|
|
| 330 |
return self.components["begin"]["obj"]._param.query
|
| 331 |
|
| 332 |
def get_component_input_elements(self, cpnnm):
|
| 333 |
-
return self.components[
|
|
|
|
| 211 |
except Exception as e:
|
| 212 |
logging.exception(f"Canvas.run got exception: {e}")
|
| 213 |
self.path[-1].append(c)
|
| 214 |
+
ran += 1
|
| 215 |
raise e
|
| 216 |
self.path[-1].append(c)
|
| 217 |
ran += 1
|
|
|
|
| 331 |
return self.components["begin"]["obj"]._param.query
|
| 332 |
|
| 333 |
def get_component_input_elements(self, cpnnm):
|
| 334 |
+
return self.components[cpnnm]["obj"].get_input_elements()
|
agent/component/base.py
CHANGED
|
@@ -37,6 +37,7 @@ class ComponentParamBase(ABC):
|
|
| 37 |
self.message_history_window_size = 22
|
| 38 |
self.query = []
|
| 39 |
self.inputs = []
|
|
|
|
| 40 |
|
| 41 |
def set_name(self, name: str):
|
| 42 |
self._name = name
|
|
@@ -410,6 +411,7 @@ class ComponentBase(ABC):
|
|
| 410 |
def run(self, history, **kwargs):
|
| 411 |
logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
| 412 |
json.dumps(kwargs, ensure_ascii=False)))
|
|
|
|
| 413 |
try:
|
| 414 |
res = self._run(history, **kwargs)
|
| 415 |
self.set_output(res)
|
|
@@ -446,10 +448,13 @@ class ComponentBase(ABC):
|
|
| 446 |
setattr(self._param, self._param.output_var_name, None)
|
| 447 |
self._param.inputs = []
|
| 448 |
|
| 449 |
-
def set_output(self, v
|
| 450 |
setattr(self._param, self._param.output_var_name, v)
|
| 451 |
|
| 452 |
def get_input(self):
|
|
|
|
|
|
|
|
|
|
| 453 |
reversed_cpnts = []
|
| 454 |
if len(self._canvas.path) > 1:
|
| 455 |
reversed_cpnts.extend(self._canvas.path[-2])
|
|
@@ -531,14 +536,15 @@ class ComponentBase(ABC):
|
|
| 531 |
eles = []
|
| 532 |
for q in self._param.query:
|
| 533 |
if q.get("component_id"):
|
| 534 |
-
|
| 535 |
-
|
|
|
|
| 536 |
eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
|
| 537 |
continue
|
| 538 |
|
| 539 |
-
eles.append({"
|
| 540 |
else:
|
| 541 |
-
eles.append({"key": q["
|
| 542 |
return eles
|
| 543 |
|
| 544 |
def get_stream_input(self):
|
|
@@ -558,3 +564,6 @@ class ComponentBase(ABC):
|
|
| 558 |
|
| 559 |
def get_component_name(self, cpn_id):
|
| 560 |
return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
self.message_history_window_size = 22
|
| 38 |
self.query = []
|
| 39 |
self.inputs = []
|
| 40 |
+
self.debug_inputs = []
|
| 41 |
|
| 42 |
def set_name(self, name: str):
|
| 43 |
self._name = name
|
|
|
|
| 411 |
def run(self, history, **kwargs):
|
| 412 |
logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
| 413 |
json.dumps(kwargs, ensure_ascii=False)))
|
| 414 |
+
self._param.debug_inputs = []
|
| 415 |
try:
|
| 416 |
res = self._run(history, **kwargs)
|
| 417 |
self.set_output(res)
|
|
|
|
| 448 |
setattr(self._param, self._param.output_var_name, None)
|
| 449 |
self._param.inputs = []
|
| 450 |
|
| 451 |
+
def set_output(self, v):
|
| 452 |
setattr(self._param, self._param.output_var_name, v)
|
| 453 |
|
| 454 |
def get_input(self):
|
| 455 |
+
if self._param.debug_inputs:
|
| 456 |
+
return pd.DataFrame([{"content": v["value"]} for v in self._param.debug_inputs])
|
| 457 |
+
|
| 458 |
reversed_cpnts = []
|
| 459 |
if len(self._canvas.path) > 1:
|
| 460 |
reversed_cpnts.extend(self._canvas.path[-2])
|
|
|
|
| 536 |
eles = []
|
| 537 |
for q in self._param.query:
|
| 538 |
if q.get("component_id"):
|
| 539 |
+
cpn_id = q["component_id"]
|
| 540 |
+
if cpn_id.split("@")[0].lower().find("begin") >= 0:
|
| 541 |
+
cpn_id, key = cpn_id.split("@")
|
| 542 |
eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
|
| 543 |
continue
|
| 544 |
|
| 545 |
+
eles.append({"name": self._canvas.get_compnent_name(cpn_id), "key": cpn_id})
|
| 546 |
else:
|
| 547 |
+
eles.append({"key": q["value"], "name": q["value"], "value": q["value"]})
|
| 548 |
return eles
|
| 549 |
|
| 550 |
def get_stream_input(self):
|
|
|
|
| 564 |
|
| 565 |
def get_component_name(self, cpn_id):
|
| 566 |
return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
|
| 567 |
+
|
| 568 |
+
def debug(self, **kwargs):
|
| 569 |
+
return self._run([], **kwargs)
|
agent/component/begin.py
CHANGED
|
@@ -43,7 +43,7 @@ class Begin(ComponentBase):
|
|
| 43 |
def stream_output(self):
|
| 44 |
res = {"content": self._param.prologue}
|
| 45 |
yield res
|
| 46 |
-
self.set_output(res)
|
| 47 |
|
| 48 |
|
| 49 |
|
|
|
|
| 43 |
def stream_output(self):
|
| 44 |
res = {"content": self._param.prologue}
|
| 45 |
yield res
|
| 46 |
+
self.set_output(self.be_output(res))
|
| 47 |
|
| 48 |
|
| 49 |
|
agent/component/generate.py
CHANGED
|
@@ -111,9 +111,9 @@ class Generate(ComponentBase):
|
|
| 111 |
|
| 112 |
def get_input_elements(self):
|
| 113 |
if self._param.parameters:
|
| 114 |
-
return self._param.parameters
|
| 115 |
|
| 116 |
-
return [{"key": "
|
| 117 |
|
| 118 |
def _run(self, history, **kwargs):
|
| 119 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
|
@@ -218,4 +218,16 @@ class Generate(ComponentBase):
|
|
| 218 |
res = self.set_cite(retrieval_res, answer)
|
| 219 |
yield res
|
| 220 |
|
| 221 |
-
self.set_output(res)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def get_input_elements(self):
|
| 113 |
if self._param.parameters:
|
| 114 |
+
return [{"key": "user"}, *self._param.parameters]
|
| 115 |
|
| 116 |
+
return [{"key": "user"}]
|
| 117 |
|
| 118 |
def _run(self, history, **kwargs):
|
| 119 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
|
|
|
| 218 |
res = self.set_cite(retrieval_res, answer)
|
| 219 |
yield res
|
| 220 |
|
| 221 |
+
self.set_output(Generate.be_output(res))
|
| 222 |
+
|
| 223 |
+
def debug(self, history, **kwargs):
|
| 224 |
+
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
| 225 |
+
prompt = self._param.prompt
|
| 226 |
+
|
| 227 |
+
for para in self._param.debug_inputs:
|
| 228 |
+
kwargs[para["key"]] = para["value"]
|
| 229 |
+
|
| 230 |
+
for n, v in kwargs.items():
|
| 231 |
+
prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
|
| 232 |
+
|
| 233 |
+
return chat_mdl.chat(prompt, [{"role": "user", "content": kwargs.get("user", "")}], self._param.gen_conf())
|
api/apps/canvas_app.py
CHANGED
|
@@ -187,10 +187,32 @@ def reset():
|
|
| 187 |
|
| 188 |
|
| 189 |
@manager.route('/input_elements', methods=['GET']) # noqa: F821
|
| 190 |
-
@validate_request("id", "component_id")
|
| 191 |
@login_required
|
| 192 |
def input_elements():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
req = request.json
|
|
|
|
|
|
|
| 194 |
try:
|
| 195 |
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
| 196 |
if not e:
|
|
@@ -201,7 +223,9 @@ def input_elements():
|
|
| 201 |
code=RetCode.OPERATING_ERROR)
|
| 202 |
|
| 203 |
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
| 204 |
-
|
|
|
|
|
|
|
| 205 |
except Exception as e:
|
| 206 |
return server_error_response(e)
|
| 207 |
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
@manager.route('/input_elements', methods=['GET']) # noqa: F821
|
|
|
|
| 190 |
@login_required
|
| 191 |
def input_elements():
|
| 192 |
+
cvs_id = request.args.get("id")
|
| 193 |
+
cpn_id = request.args.get("component_id")
|
| 194 |
+
try:
|
| 195 |
+
e, user_canvas = UserCanvasService.get_by_id(cvs_id)
|
| 196 |
+
if not e:
|
| 197 |
+
return get_data_error_result(message="canvas not found.")
|
| 198 |
+
if not UserCanvasService.query(user_id=current_user.id, id=cvs_id):
|
| 199 |
+
return get_json_result(
|
| 200 |
+
data=False, message='Only owner of canvas authorized for this operation.',
|
| 201 |
+
code=RetCode.OPERATING_ERROR)
|
| 202 |
+
|
| 203 |
+
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
| 204 |
+
return get_json_result(data=canvas.get_component_input_elements(cpn_id))
|
| 205 |
+
except Exception as e:
|
| 206 |
+
return server_error_response(e)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
@manager.route('/debug', methods=['POST']) # noqa: F821
|
| 210 |
+
@validate_request("id", "component_id", "params")
|
| 211 |
+
@login_required
|
| 212 |
+
def debug():
|
| 213 |
req = request.json
|
| 214 |
+
for p in req["params"]:
|
| 215 |
+
assert p.get("key")
|
| 216 |
try:
|
| 217 |
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
| 218 |
if not e:
|
|
|
|
| 223 |
code=RetCode.OPERATING_ERROR)
|
| 224 |
|
| 225 |
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
| 226 |
+
canvas.get_component(req["component_id"])["obj"]._param.debug_inputs = req["params"]
|
| 227 |
+
df = canvas.get_component(req["component_id"])["obj"].debug()
|
| 228 |
+
return get_json_result(data=df.to_dict(orient="records"))
|
| 229 |
except Exception as e:
|
| 230 |
return server_error_response(e)
|
| 231 |
|
api/apps/conversation_app.py
CHANGED
|
@@ -95,6 +95,8 @@ def get():
|
|
| 95 |
return d.get(k1, d.get(k2))
|
| 96 |
|
| 97 |
for ref in conv.reference:
|
|
|
|
|
|
|
| 98 |
ref["chunks"] = [{
|
| 99 |
"id": get_value(ck, "chunk_id", "id"),
|
| 100 |
"content": get_value(ck, "content", "content_with_weight"),
|
|
|
|
| 95 |
return d.get(k1, d.get(k2))
|
| 96 |
|
| 97 |
for ref in conv.reference:
|
| 98 |
+
if isinstance(ref, list):
|
| 99 |
+
continue
|
| 100 |
ref["chunks"] = [{
|
| 101 |
"id": get_value(ck, "chunk_id", "id"),
|
| 102 |
"content": get_value(ck, "content", "content_with_weight"),
|
api/apps/document_app.py
CHANGED
|
@@ -552,7 +552,7 @@ def parse():
|
|
| 552 |
})
|
| 553 |
driver = Chrome(options=options)
|
| 554 |
driver.get(url)
|
| 555 |
-
res_headers = [r.response.headers for r in driver.requests]
|
| 556 |
if len(res_headers) > 1:
|
| 557 |
sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
|
| 558 |
driver.quit()
|
|
|
|
| 552 |
})
|
| 553 |
driver = Chrome(options=options)
|
| 554 |
driver.get(url)
|
| 555 |
+
res_headers = [r.response.headers for r in driver.requests if r and r.response]
|
| 556 |
if len(res_headers) > 1:
|
| 557 |
sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
|
| 558 |
driver.quit()
|
rag/svr/task_executor.py
CHANGED
|
@@ -54,7 +54,7 @@ from rag.app import laws, paper, presentation, manual, qa, table, book, resume,
|
|
| 54 |
from rag.nlp import search, rag_tokenizer
|
| 55 |
from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
|
| 56 |
from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
|
| 57 |
-
from rag.utils import
|
| 58 |
from rag.utils.redis_conn import REDIS_CONN, Payload
|
| 59 |
from rag.utils.storage_factory import STORAGE_IMPL
|
| 60 |
|
|
@@ -269,7 +269,7 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
|
| 269 |
batch_size = 16
|
| 270 |
tts, cnts = [], []
|
| 271 |
for d in docs:
|
| 272 |
-
tts.append(
|
| 273 |
c = "\n".join(d.get("question_kwd", []))
|
| 274 |
if not c:
|
| 275 |
c = d["content_with_weight"]
|
|
|
|
| 54 |
from rag.nlp import search, rag_tokenizer
|
| 55 |
from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
|
| 56 |
from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
|
| 57 |
+
from rag.utils import num_tokens_from_string
|
| 58 |
from rag.utils.redis_conn import REDIS_CONN, Payload
|
| 59 |
from rag.utils.storage_factory import STORAGE_IMPL
|
| 60 |
|
|
|
|
| 269 |
batch_size = 16
|
| 270 |
tts, cnts = [], []
|
| 271 |
for d in docs:
|
| 272 |
+
tts.append(d.get("docnm_kwd", "Title"))
|
| 273 |
c = "\n".join(d.get("question_kwd", []))
|
| 274 |
if not c:
|
| 275 |
c = d["content_with_weight"]
|