Rework logging (#3358)
Browse filesUnified all log files into one.
### What problem does this PR solve?
Unified all log files into one.
### Type of change
- [x] Refactoring
This view is limited to 50 files because it contains too many changes.
See raw diff
- agent/canvas.py +5 -7
- agent/component/arxiv.py +2 -3
- agent/component/baidu.py +2 -4
- agent/component/base.py +7 -7
- agent/component/bing.py +2 -3
- agent/component/categorize.py +3 -3
- agent/component/duckduckgo.py +2 -2
- agent/component/github.py +2 -2
- agent/component/google.py +3 -3
- agent/component/googlescholar.py +4 -4
- agent/component/keyword.py +2 -2
- agent/component/pubmed.py +2 -2
- agent/component/relevant.py +2 -1
- agent/component/retrieval.py +2 -1
- agent/component/rewrite.py +2 -1
- agent/component/wikipedia.py +2 -4
- agent/component/yahoofinance.py +3 -2
- agent/settings.py +0 -16
- api/apps/__init__.py +5 -10
- api/apps/canvas_app.py +2 -1
- api/apps/llm_app.py +2 -1
- api/apps/sdk/dataset.py +1 -1
- api/apps/user_app.py +7 -7
- api/db/db_models.py +8 -11
- api/db/db_utils.py +0 -6
- api/db/init_data.py +14 -16
- api/db/operatioins.py +0 -21
- api/db/services/dialog_service.py +10 -13
- api/db/services/document_service.py +4 -4
- api/db/services/file_service.py +5 -4
- api/db/services/llm_service.py +17 -17
- api/ragflow_server.py +14 -18
- api/settings.py +0 -17
- api/utils/api_utils.py +4 -3
- api/utils/log_utils.py +25 -287
- deepdoc/parser/pdf_parser.py +25 -24
- deepdoc/parser/resume/entities/corporations.py +9 -3
- deepdoc/parser/resume/step_two.py +20 -15
- deepdoc/vision/operators.py +2 -2
- deepdoc/vision/recognizer.py +2 -1
- deepdoc/vision/seeit.py +2 -1
- deepdoc/vision/t_recognizer.py +5 -2
- graphrag/claim_extractor.py +3 -4
- graphrag/community_reports_extractor.py +4 -7
- graphrag/index.py +3 -2
- graphrag/mind_map_extractor.py +3 -3
- intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py +3 -3
- rag/app/book.py +2 -1
- rag/app/email.py +2 -2
- rag/app/laws.py +3 -3
agent/canvas.py
CHANGED
|
@@ -14,14 +14,12 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import json
|
| 17 |
-
import traceback
|
| 18 |
from abc import ABC
|
| 19 |
from copy import deepcopy
|
| 20 |
from functools import partial
|
| 21 |
from agent.component import component_class
|
| 22 |
from agent.component.base import ComponentBase
|
| 23 |
-
from
|
| 24 |
-
|
| 25 |
|
| 26 |
class Canvas(ABC):
|
| 27 |
"""
|
|
@@ -189,7 +187,7 @@ class Canvas(ABC):
|
|
| 189 |
if cpn.component_name == "Answer":
|
| 190 |
self.answer.append(c)
|
| 191 |
else:
|
| 192 |
-
|
| 193 |
cpids = cpn.get_dependent_components()
|
| 194 |
if any([c not in self.path[-1] for c in cpids]):
|
| 195 |
continue
|
|
@@ -199,7 +197,7 @@ class Canvas(ABC):
|
|
| 199 |
|
| 200 |
prepare2run(self.components[self.path[-2][-1]]["downstream"])
|
| 201 |
while 0 <= ran < len(self.path[-1]):
|
| 202 |
-
|
| 203 |
cpn_id = self.path[-1][ran]
|
| 204 |
cpn = self.get_component(cpn_id)
|
| 205 |
if not cpn["downstream"]: break
|
|
@@ -219,7 +217,7 @@ class Canvas(ABC):
|
|
| 219 |
self.get_component(p)["obj"].set_exception(e)
|
| 220 |
prepare2run([p])
|
| 221 |
break
|
| 222 |
-
|
| 223 |
break
|
| 224 |
continue
|
| 225 |
|
|
@@ -231,7 +229,7 @@ class Canvas(ABC):
|
|
| 231 |
self.get_component(p)["obj"].set_exception(e)
|
| 232 |
prepare2run([p])
|
| 233 |
break
|
| 234 |
-
|
| 235 |
break
|
| 236 |
|
| 237 |
if self.answer:
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import json
|
|
|
|
| 17 |
from abc import ABC
|
| 18 |
from copy import deepcopy
|
| 19 |
from functools import partial
|
| 20 |
from agent.component import component_class
|
| 21 |
from agent.component.base import ComponentBase
|
| 22 |
+
from api.utils.log_utils import logger
|
|
|
|
| 23 |
|
| 24 |
class Canvas(ABC):
|
| 25 |
"""
|
|
|
|
| 187 |
if cpn.component_name == "Answer":
|
| 188 |
self.answer.append(c)
|
| 189 |
else:
|
| 190 |
+
logger.debug(f"Canvas.prepare2run: {c}")
|
| 191 |
cpids = cpn.get_dependent_components()
|
| 192 |
if any([c not in self.path[-1] for c in cpids]):
|
| 193 |
continue
|
|
|
|
| 197 |
|
| 198 |
prepare2run(self.components[self.path[-2][-1]]["downstream"])
|
| 199 |
while 0 <= ran < len(self.path[-1]):
|
| 200 |
+
logger.debug(f"Canvas.run: {ran} {self.path}")
|
| 201 |
cpn_id = self.path[-1][ran]
|
| 202 |
cpn = self.get_component(cpn_id)
|
| 203 |
if not cpn["downstream"]: break
|
|
|
|
| 217 |
self.get_component(p)["obj"].set_exception(e)
|
| 218 |
prepare2run([p])
|
| 219 |
break
|
| 220 |
+
logger.exception("Canvas.run got exception")
|
| 221 |
break
|
| 222 |
continue
|
| 223 |
|
|
|
|
| 229 |
self.get_component(p)["obj"].set_exception(e)
|
| 230 |
prepare2run([p])
|
| 231 |
break
|
| 232 |
+
logger.exception("Canvas.run got exception")
|
| 233 |
break
|
| 234 |
|
| 235 |
if self.answer:
|
agent/component/arxiv.py
CHANGED
|
@@ -16,9 +16,8 @@
|
|
| 16 |
from abc import ABC
|
| 17 |
import arxiv
|
| 18 |
import pandas as pd
|
| 19 |
-
from agent.settings import DEBUG
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 21 |
-
|
| 22 |
|
| 23 |
class ArXivParam(ComponentParamBase):
|
| 24 |
"""
|
|
@@ -65,5 +64,5 @@ class ArXiv(ComponentBase, ABC):
|
|
| 65 |
return ArXiv.be_output("")
|
| 66 |
|
| 67 |
df = pd.DataFrame(arxiv_res)
|
| 68 |
-
|
| 69 |
return df
|
|
|
|
| 16 |
from abc import ABC
|
| 17 |
import arxiv
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
class ArXivParam(ComponentParamBase):
|
| 23 |
"""
|
|
|
|
| 64 |
return ArXiv.be_output("")
|
| 65 |
|
| 66 |
df = pd.DataFrame(arxiv_res)
|
| 67 |
+
logger.debug(f"df: {str(df)}")
|
| 68 |
return df
|
agent/component/baidu.py
CHANGED
|
@@ -13,14 +13,12 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import random
|
| 17 |
from abc import ABC
|
| 18 |
-
from functools import partial
|
| 19 |
import pandas as pd
|
| 20 |
import requests
|
| 21 |
import re
|
| 22 |
-
from agent.settings import DEBUG
|
| 23 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class BaiduParam(ComponentParamBase):
|
|
@@ -64,6 +62,6 @@ class Baidu(ComponentBase, ABC):
|
|
| 64 |
return Baidu.be_output("")
|
| 65 |
|
| 66 |
df = pd.DataFrame(baidu_res)
|
| 67 |
-
|
| 68 |
return df
|
| 69 |
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
| 16 |
from abc import ABC
|
|
|
|
| 17 |
import pandas as pd
|
| 18 |
import requests
|
| 19 |
import re
|
|
|
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 21 |
+
from api.utils.log_utils import logger
|
| 22 |
|
| 23 |
|
| 24 |
class BaiduParam(ComponentParamBase):
|
|
|
|
| 62 |
return Baidu.be_output("")
|
| 63 |
|
| 64 |
df = pd.DataFrame(baidu_res)
|
| 65 |
+
logger.debug(f"df: {str(df)}")
|
| 66 |
return df
|
| 67 |
|
agent/component/base.py
CHANGED
|
@@ -17,14 +17,14 @@ from abc import ABC
|
|
| 17 |
import builtins
|
| 18 |
import json
|
| 19 |
import os
|
| 20 |
-
from copy import deepcopy
|
| 21 |
from functools import partial
|
| 22 |
-
from typing import
|
| 23 |
|
| 24 |
import pandas as pd
|
| 25 |
|
| 26 |
from agent import settings
|
| 27 |
-
from
|
|
|
|
| 28 |
|
| 29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
| 30 |
_DEPRECATED_PARAMS = "_deprecated_params"
|
|
@@ -361,13 +361,13 @@ class ComponentParamBase(ABC):
|
|
| 361 |
|
| 362 |
def _warn_deprecated_param(self, param_name, descr):
|
| 363 |
if self._deprecated_params_set.get(param_name):
|
| 364 |
-
|
| 365 |
f"{descr} {param_name} is deprecated and ignored in this version."
|
| 366 |
)
|
| 367 |
|
| 368 |
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
| 369 |
if self._deprecated_params_set.get(param_name):
|
| 370 |
-
|
| 371 |
f"{descr} {param_name} will be deprecated in future release; "
|
| 372 |
f"please use {new_param} instead."
|
| 373 |
)
|
|
@@ -403,7 +403,7 @@ class ComponentBase(ABC):
|
|
| 403 |
return cpnts
|
| 404 |
|
| 405 |
def run(self, history, **kwargs):
|
| 406 |
-
|
| 407 |
json.dumps(kwargs, ensure_ascii=False)))
|
| 408 |
try:
|
| 409 |
res = self._run(history, **kwargs)
|
|
@@ -463,7 +463,7 @@ class ComponentBase(ABC):
|
|
| 463 |
reversed_cpnts.extend(self._canvas.path[-2])
|
| 464 |
reversed_cpnts.extend(self._canvas.path[-1])
|
| 465 |
|
| 466 |
-
|
| 467 |
for u in reversed_cpnts[::-1]:
|
| 468 |
if self.get_component_name(u) in ["switch", "concentrator"]: continue
|
| 469 |
if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
|
|
|
|
| 17 |
import builtins
|
| 18 |
import json
|
| 19 |
import os
|
|
|
|
| 20 |
from functools import partial
|
| 21 |
+
from typing import Tuple, Union
|
| 22 |
|
| 23 |
import pandas as pd
|
| 24 |
|
| 25 |
from agent import settings
|
| 26 |
+
from api.utils.log_utils import logger
|
| 27 |
+
|
| 28 |
|
| 29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
| 30 |
_DEPRECATED_PARAMS = "_deprecated_params"
|
|
|
|
| 361 |
|
| 362 |
def _warn_deprecated_param(self, param_name, descr):
|
| 363 |
if self._deprecated_params_set.get(param_name):
|
| 364 |
+
logger.warning(
|
| 365 |
f"{descr} {param_name} is deprecated and ignored in this version."
|
| 366 |
)
|
| 367 |
|
| 368 |
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
| 369 |
if self._deprecated_params_set.get(param_name):
|
| 370 |
+
logger.warning(
|
| 371 |
f"{descr} {param_name} will be deprecated in future release; "
|
| 372 |
f"please use {new_param} instead."
|
| 373 |
)
|
|
|
|
| 403 |
return cpnts
|
| 404 |
|
| 405 |
def run(self, history, **kwargs):
|
| 406 |
+
logger.info("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
| 407 |
json.dumps(kwargs, ensure_ascii=False)))
|
| 408 |
try:
|
| 409 |
res = self._run(history, **kwargs)
|
|
|
|
| 463 |
reversed_cpnts.extend(self._canvas.path[-2])
|
| 464 |
reversed_cpnts.extend(self._canvas.path[-1])
|
| 465 |
|
| 466 |
+
logger.debug(f"{self.component_name} {reversed_cpnts[::-1]}")
|
| 467 |
for u in reversed_cpnts[::-1]:
|
| 468 |
if self.get_component_name(u) in ["switch", "concentrator"]: continue
|
| 469 |
if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
|
agent/component/bing.py
CHANGED
|
@@ -16,9 +16,8 @@
|
|
| 16 |
from abc import ABC
|
| 17 |
import requests
|
| 18 |
import pandas as pd
|
| 19 |
-
from agent.settings import DEBUG
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 21 |
-
|
| 22 |
|
| 23 |
class BingParam(ComponentParamBase):
|
| 24 |
"""
|
|
@@ -81,5 +80,5 @@ class Bing(ComponentBase, ABC):
|
|
| 81 |
return Bing.be_output("")
|
| 82 |
|
| 83 |
df = pd.DataFrame(bing_res)
|
| 84 |
-
|
| 85 |
return df
|
|
|
|
| 16 |
from abc import ABC
|
| 17 |
import requests
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
class BingParam(ComponentParamBase):
|
| 23 |
"""
|
|
|
|
| 80 |
return Bing.be_output("")
|
| 81 |
|
| 82 |
df = pd.DataFrame(bing_res)
|
| 83 |
+
logger.debug(f"df: {str(df)}")
|
| 84 |
return df
|
agent/component/categorize.py
CHANGED
|
@@ -17,7 +17,7 @@ from abc import ABC
|
|
| 17 |
from api.db import LLMType
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
| 20 |
-
from
|
| 21 |
|
| 22 |
|
| 23 |
class CategorizeParam(GenerateParam):
|
|
@@ -34,7 +34,7 @@ class CategorizeParam(GenerateParam):
|
|
| 34 |
super().check()
|
| 35 |
self.check_empty(self.category_description, "[Categorize] Category examples")
|
| 36 |
for k, v in self.category_description.items():
|
| 37 |
-
if not k: raise ValueError(
|
| 38 |
if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
|
| 39 |
|
| 40 |
def get_prompt(self):
|
|
@@ -77,7 +77,7 @@ class Categorize(Generate, ABC):
|
|
| 77 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
| 78 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
|
| 79 |
self._param.gen_conf())
|
| 80 |
-
|
| 81 |
for c in self._param.category_description.keys():
|
| 82 |
if ans.lower().find(c.lower()) >= 0:
|
| 83 |
return Categorize.be_output(self._param.category_description[c]["to"])
|
|
|
|
| 17 |
from api.db import LLMType
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class CategorizeParam(GenerateParam):
|
|
|
|
| 34 |
super().check()
|
| 35 |
self.check_empty(self.category_description, "[Categorize] Category examples")
|
| 36 |
for k, v in self.category_description.items():
|
| 37 |
+
if not k: raise ValueError("[Categorize] Category name can not be empty!")
|
| 38 |
if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
|
| 39 |
|
| 40 |
def get_prompt(self):
|
|
|
|
| 77 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
| 78 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
|
| 79 |
self._param.gen_conf())
|
| 80 |
+
logger.debug(f"input: {input}, answer: {str(ans)}")
|
| 81 |
for c in self._param.category_description.keys():
|
| 82 |
if ans.lower().find(c.lower()) >= 0:
|
| 83 |
return Categorize.be_output(self._param.category_description[c]["to"])
|
agent/component/duckduckgo.py
CHANGED
|
@@ -16,8 +16,8 @@
|
|
| 16 |
from abc import ABC
|
| 17 |
from duckduckgo_search import DDGS
|
| 18 |
import pandas as pd
|
| 19 |
-
from agent.settings import DEBUG
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class DuckDuckGoParam(ComponentParamBase):
|
|
@@ -62,5 +62,5 @@ class DuckDuckGo(ComponentBase, ABC):
|
|
| 62 |
return DuckDuckGo.be_output("")
|
| 63 |
|
| 64 |
df = pd.DataFrame(duck_res)
|
| 65 |
-
|
| 66 |
return df
|
|
|
|
| 16 |
from abc import ABC
|
| 17 |
from duckduckgo_search import DDGS
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class DuckDuckGoParam(ComponentParamBase):
|
|
|
|
| 62 |
return DuckDuckGo.be_output("")
|
| 63 |
|
| 64 |
df = pd.DataFrame(duck_res)
|
| 65 |
+
logger.debug("df: {df}")
|
| 66 |
return df
|
agent/component/github.py
CHANGED
|
@@ -16,8 +16,8 @@
|
|
| 16 |
from abc import ABC
|
| 17 |
import pandas as pd
|
| 18 |
import requests
|
| 19 |
-
from agent.settings import DEBUG
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class GitHubParam(ComponentParamBase):
|
|
@@ -57,5 +57,5 @@ class GitHub(ComponentBase, ABC):
|
|
| 57 |
return GitHub.be_output("")
|
| 58 |
|
| 59 |
df = pd.DataFrame(github_res)
|
| 60 |
-
|
| 61 |
return df
|
|
|
|
| 16 |
from abc import ABC
|
| 17 |
import pandas as pd
|
| 18 |
import requests
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class GitHubParam(ComponentParamBase):
|
|
|
|
| 57 |
return GitHub.be_output("")
|
| 58 |
|
| 59 |
df = pd.DataFrame(github_res)
|
| 60 |
+
logger.debug(f"df: {df}")
|
| 61 |
return df
|
agent/component/google.py
CHANGED
|
@@ -16,8 +16,8 @@
|
|
| 16 |
from abc import ABC
|
| 17 |
from serpapi import GoogleSearch
|
| 18 |
import pandas as pd
|
| 19 |
-
from agent.settings import DEBUG
|
| 20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class GoogleParam(ComponentParamBase):
|
|
@@ -85,12 +85,12 @@ class Google(ComponentBase, ABC):
|
|
| 85 |
"hl": self._param.language, "num": self._param.top_n})
|
| 86 |
google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
|
| 87 |
client.get_dict()["organic_results"]]
|
| 88 |
-
except Exception
|
| 89 |
return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
|
| 90 |
|
| 91 |
if not google_res:
|
| 92 |
return Google.be_output("")
|
| 93 |
|
| 94 |
df = pd.DataFrame(google_res)
|
| 95 |
-
|
| 96 |
return df
|
|
|
|
| 16 |
from abc import ABC
|
| 17 |
from serpapi import GoogleSearch
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class GoogleParam(ComponentParamBase):
|
|
|
|
| 85 |
"hl": self._param.language, "num": self._param.top_n})
|
| 86 |
google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
|
| 87 |
client.get_dict()["organic_results"]]
|
| 88 |
+
except Exception:
|
| 89 |
return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
|
| 90 |
|
| 91 |
if not google_res:
|
| 92 |
return Google.be_output("")
|
| 93 |
|
| 94 |
df = pd.DataFrame(google_res)
|
| 95 |
+
logger.debug(f"df: {df}")
|
| 96 |
return df
|
agent/component/googlescholar.py
CHANGED
|
@@ -15,9 +15,9 @@
|
|
| 15 |
#
|
| 16 |
from abc import ABC
|
| 17 |
import pandas as pd
|
| 18 |
-
from agent.settings import DEBUG
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
from scholarly import scholarly
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class GoogleScholarParam(ComponentParamBase):
|
|
@@ -58,13 +58,13 @@ class GoogleScholar(ComponentBase, ABC):
|
|
| 58 |
'pub_url'] + '"></a> ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[
|
| 59 |
'bib'].get('abstract', 'no abstract')})
|
| 60 |
|
| 61 |
-
except StopIteration or Exception
|
| 62 |
-
|
| 63 |
break
|
| 64 |
|
| 65 |
if not scholar_res:
|
| 66 |
return GoogleScholar.be_output("")
|
| 67 |
|
| 68 |
df = pd.DataFrame(scholar_res)
|
| 69 |
-
|
| 70 |
return df
|
|
|
|
| 15 |
#
|
| 16 |
from abc import ABC
|
| 17 |
import pandas as pd
|
|
|
|
| 18 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 19 |
from scholarly import scholarly
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class GoogleScholarParam(ComponentParamBase):
|
|
|
|
| 58 |
'pub_url'] + '"></a> ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[
|
| 59 |
'bib'].get('abstract', 'no abstract')})
|
| 60 |
|
| 61 |
+
except StopIteration or Exception:
|
| 62 |
+
logger.exception("GoogleScholar")
|
| 63 |
break
|
| 64 |
|
| 65 |
if not scholar_res:
|
| 66 |
return GoogleScholar.be_output("")
|
| 67 |
|
| 68 |
df = pd.DataFrame(scholar_res)
|
| 69 |
+
logger.debug(f"df: {df}")
|
| 70 |
return df
|
agent/component/keyword.py
CHANGED
|
@@ -18,7 +18,7 @@ from abc import ABC
|
|
| 18 |
from api.db import LLMType
|
| 19 |
from api.db.services.llm_service import LLMBundle
|
| 20 |
from agent.component import GenerateParam, Generate
|
| 21 |
-
from
|
| 22 |
|
| 23 |
|
| 24 |
class KeywordExtractParam(GenerateParam):
|
|
@@ -58,5 +58,5 @@ class KeywordExtract(Generate, ABC):
|
|
| 58 |
self._param.gen_conf())
|
| 59 |
|
| 60 |
ans = re.sub(r".*keyword:", "", ans).strip()
|
| 61 |
-
|
| 62 |
return KeywordExtract.be_output(ans)
|
|
|
|
| 18 |
from api.db import LLMType
|
| 19 |
from api.db.services.llm_service import LLMBundle
|
| 20 |
from agent.component import GenerateParam, Generate
|
| 21 |
+
from api.utils.log_utils import logger
|
| 22 |
|
| 23 |
|
| 24 |
class KeywordExtractParam(GenerateParam):
|
|
|
|
| 58 |
self._param.gen_conf())
|
| 59 |
|
| 60 |
ans = re.sub(r".*keyword:", "", ans).strip()
|
| 61 |
+
logger.info(f"ans: {ans}")
|
| 62 |
return KeywordExtract.be_output(ans)
|
agent/component/pubmed.py
CHANGED
|
@@ -18,8 +18,8 @@ from Bio import Entrez
|
|
| 18 |
import re
|
| 19 |
import pandas as pd
|
| 20 |
import xml.etree.ElementTree as ET
|
| 21 |
-
from agent.settings import DEBUG
|
| 22 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class PubMedParam(ComponentParamBase):
|
|
@@ -65,5 +65,5 @@ class PubMed(ComponentBase, ABC):
|
|
| 65 |
return PubMed.be_output("")
|
| 66 |
|
| 67 |
df = pd.DataFrame(pubmed_res)
|
| 68 |
-
|
| 69 |
return df
|
|
|
|
| 18 |
import re
|
| 19 |
import pandas as pd
|
| 20 |
import xml.etree.ElementTree as ET
|
|
|
|
| 21 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 22 |
+
from api.utils.log_utils import logger
|
| 23 |
|
| 24 |
|
| 25 |
class PubMedParam(ComponentParamBase):
|
|
|
|
| 65 |
return PubMed.be_output("")
|
| 66 |
|
| 67 |
df = pd.DataFrame(pubmed_res)
|
| 68 |
+
logger.debug(f"df: {df}")
|
| 69 |
return df
|
agent/component/relevant.py
CHANGED
|
@@ -18,6 +18,7 @@ from api.db import LLMType
|
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
| 20 |
from rag.utils import num_tokens_from_string, encoder
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class RelevantParam(GenerateParam):
|
|
@@ -70,7 +71,7 @@ class Relevant(Generate, ABC):
|
|
| 70 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
|
| 71 |
self._param.gen_conf())
|
| 72 |
|
| 73 |
-
|
| 74 |
if ans.lower().find("yes") >= 0:
|
| 75 |
return Relevant.be_output(self._param.yes)
|
| 76 |
if ans.lower().find("no") >= 0:
|
|
|
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
| 20 |
from rag.utils import num_tokens_from_string, encoder
|
| 21 |
+
from api.utils.log_utils import logger
|
| 22 |
|
| 23 |
|
| 24 |
class RelevantParam(GenerateParam):
|
|
|
|
| 71 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
|
| 72 |
self._param.gen_conf())
|
| 73 |
|
| 74 |
+
logger.info(ans)
|
| 75 |
if ans.lower().find("yes") >= 0:
|
| 76 |
return Relevant.be_output(self._param.yes)
|
| 77 |
if ans.lower().find("no") >= 0:
|
agent/component/retrieval.py
CHANGED
|
@@ -22,6 +22,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
| 22 |
from api.db.services.llm_service import LLMBundle
|
| 23 |
from api.settings import retrievaler
|
| 24 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
class RetrievalParam(ComponentParamBase):
|
|
@@ -80,7 +81,7 @@ class Retrieval(ComponentBase, ABC):
|
|
| 80 |
df = pd.DataFrame(kbinfos["chunks"])
|
| 81 |
df["content"] = df["content_with_weight"]
|
| 82 |
del df["content_with_weight"]
|
| 83 |
-
|
| 84 |
return df
|
| 85 |
|
| 86 |
|
|
|
|
| 22 |
from api.db.services.llm_service import LLMBundle
|
| 23 |
from api.settings import retrievaler
|
| 24 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 25 |
+
from api.utils.log_utils import logger
|
| 26 |
|
| 27 |
|
| 28 |
class RetrievalParam(ComponentParamBase):
|
|
|
|
| 81 |
df = pd.DataFrame(kbinfos["chunks"])
|
| 82 |
df["content"] = df["content_with_weight"]
|
| 83 |
del df["content_with_weight"]
|
| 84 |
+
logger.debug("{} {}".format(query, df))
|
| 85 |
return df
|
| 86 |
|
| 87 |
|
agent/component/rewrite.py
CHANGED
|
@@ -17,6 +17,7 @@ from abc import ABC
|
|
| 17 |
from api.db import LLMType
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
class RewriteQuestionParam(GenerateParam):
|
|
@@ -104,7 +105,7 @@ class RewriteQuestion(Generate, ABC):
|
|
| 104 |
self._canvas.history.pop()
|
| 105 |
self._canvas.history.append(("user", ans))
|
| 106 |
|
| 107 |
-
|
| 108 |
return RewriteQuestion.be_output(ans)
|
| 109 |
|
| 110 |
|
|
|
|
| 17 |
from api.db import LLMType
|
| 18 |
from api.db.services.llm_service import LLMBundle
|
| 19 |
from agent.component import GenerateParam, Generate
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class RewriteQuestionParam(GenerateParam):
|
|
|
|
| 105 |
self._canvas.history.pop()
|
| 106 |
self._canvas.history.append(("user", ans))
|
| 107 |
|
| 108 |
+
logger.info(ans)
|
| 109 |
return RewriteQuestion.be_output(ans)
|
| 110 |
|
| 111 |
|
agent/component/wikipedia.py
CHANGED
|
@@ -13,13 +13,11 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import random
|
| 17 |
from abc import ABC
|
| 18 |
-
from functools import partial
|
| 19 |
import wikipedia
|
| 20 |
import pandas as pd
|
| 21 |
-
from agent.settings import DEBUG
|
| 22 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class WikipediaParam(ComponentParamBase):
|
|
@@ -65,5 +63,5 @@ class Wikipedia(ComponentBase, ABC):
|
|
| 65 |
return Wikipedia.be_output("")
|
| 66 |
|
| 67 |
df = pd.DataFrame(wiki_res)
|
| 68 |
-
|
| 69 |
return df
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
| 16 |
from abc import ABC
|
|
|
|
| 17 |
import wikipedia
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class WikipediaParam(ComponentParamBase):
|
|
|
|
| 63 |
return Wikipedia.be_output("")
|
| 64 |
|
| 65 |
df = pd.DataFrame(wiki_res)
|
| 66 |
+
logger.debug(f"df: {df}")
|
| 67 |
return df
|
agent/component/yahoofinance.py
CHANGED
|
@@ -17,6 +17,7 @@ from abc import ABC
|
|
| 17 |
import pandas as pd
|
| 18 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 19 |
import yfinance as yf
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
class YahooFinanceParam(ComponentParamBase):
|
|
@@ -74,8 +75,8 @@ class YahooFinance(ComponentBase, ABC):
|
|
| 74 |
{"content": "quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n"})
|
| 75 |
if self._param.news:
|
| 76 |
yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
|
| 77 |
-
except Exception
|
| 78 |
-
|
| 79 |
|
| 80 |
if not yohoo_res:
|
| 81 |
return YahooFinance.be_output("")
|
|
|
|
| 17 |
import pandas as pd
|
| 18 |
from agent.component.base import ComponentBase, ComponentParamBase
|
| 19 |
import yfinance as yf
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
|
| 23 |
class YahooFinanceParam(ComponentParamBase):
|
|
|
|
| 75 |
{"content": "quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n"})
|
| 76 |
if self._param.news:
|
| 77 |
yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
|
| 78 |
+
except Exception:
|
| 79 |
+
logger.exception("YahooFinance got exception")
|
| 80 |
|
| 81 |
if not yohoo_res:
|
| 82 |
return YahooFinance.be_output("")
|
agent/settings.py
CHANGED
|
@@ -13,22 +13,6 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
# Logger
|
| 17 |
-
import os
|
| 18 |
|
| 19 |
-
from api.utils.file_utils import get_project_base_directory
|
| 20 |
-
from api.utils.log_utils import LoggerFactory, getLogger
|
| 21 |
-
|
| 22 |
-
DEBUG = 0
|
| 23 |
-
LoggerFactory.set_directory(
|
| 24 |
-
os.path.join(
|
| 25 |
-
get_project_base_directory(),
|
| 26 |
-
"logs",
|
| 27 |
-
"flow"))
|
| 28 |
-
# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
|
| 29 |
-
LoggerFactory.LEVEL = 30
|
| 30 |
-
|
| 31 |
-
flow_logger = getLogger("flow")
|
| 32 |
-
database_logger = getLogger("database")
|
| 33 |
FLOAT_ZERO = 1e-8
|
| 34 |
PARAM_MAXDEPTH = 5
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
FLOAT_ZERO = 1e-8
|
| 18 |
PARAM_MAXDEPTH = 5
|
api/apps/__init__.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import logging
|
| 17 |
import os
|
| 18 |
import sys
|
| 19 |
from importlib.util import module_from_spec, spec_from_file_location
|
|
@@ -30,18 +29,14 @@ from api.utils import CustomJSONEncoder, commands
|
|
| 30 |
|
| 31 |
from flask_session import Session
|
| 32 |
from flask_login import LoginManager
|
| 33 |
-
from api.settings import SECRET_KEY
|
| 34 |
-
from api.settings import API_VERSION
|
| 35 |
from api.utils.api_utils import server_error_response
|
|
|
|
| 36 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 37 |
|
| 38 |
__all__ = ["app"]
|
| 39 |
|
| 40 |
-
|
| 41 |
-
logger = logging.getLogger("flask.app")
|
| 42 |
-
for h in access_logger.handlers:
|
| 43 |
-
logger.addHandler(h)
|
| 44 |
-
|
| 45 |
Request.json = property(lambda self: self.get_json(force=True, silent=True))
|
| 46 |
|
| 47 |
app = Flask(__name__)
|
|
@@ -158,8 +153,8 @@ def load_user(web_request):
|
|
| 158 |
return user[0]
|
| 159 |
else:
|
| 160 |
return None
|
| 161 |
-
except Exception
|
| 162 |
-
|
| 163 |
return None
|
| 164 |
else:
|
| 165 |
return None
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
| 16 |
import os
|
| 17 |
import sys
|
| 18 |
from importlib.util import module_from_spec, spec_from_file_location
|
|
|
|
| 29 |
|
| 30 |
from flask_session import Session
|
| 31 |
from flask_login import LoginManager
|
| 32 |
+
from api.settings import SECRET_KEY
|
| 33 |
+
from api.settings import API_VERSION
|
| 34 |
from api.utils.api_utils import server_error_response
|
| 35 |
+
from api.utils.log_utils import logger
|
| 36 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
| 37 |
|
| 38 |
__all__ = ["app"]
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
Request.json = property(lambda self: self.get_json(force=True, silent=True))
|
| 41 |
|
| 42 |
app = Flask(__name__)
|
|
|
|
| 153 |
return user[0]
|
| 154 |
else:
|
| 155 |
return None
|
| 156 |
+
except Exception:
|
| 157 |
+
logger.exception("load_user got exception")
|
| 158 |
return None
|
| 159 |
else:
|
| 160 |
return None
|
api/apps/canvas_app.py
CHANGED
|
@@ -23,6 +23,7 @@ from api.utils import get_uuid
|
|
| 23 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
| 24 |
from agent.canvas import Canvas
|
| 25 |
from peewee import MySQLDatabase, PostgresqlDatabase
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
@manager.route('/templates', methods=['GET'])
|
|
@@ -114,7 +115,7 @@ def run():
|
|
| 114 |
pass
|
| 115 |
canvas.add_user_input(req["message"])
|
| 116 |
answer = canvas.run(stream=stream)
|
| 117 |
-
|
| 118 |
except Exception as e:
|
| 119 |
return server_error_response(e)
|
| 120 |
|
|
|
|
| 23 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
| 24 |
from agent.canvas import Canvas
|
| 25 |
from peewee import MySQLDatabase, PostgresqlDatabase
|
| 26 |
+
from api.utils.log_utils import logger
|
| 27 |
|
| 28 |
|
| 29 |
@manager.route('/templates', methods=['GET'])
|
|
|
|
| 115 |
pass
|
| 116 |
canvas.add_user_input(req["message"])
|
| 117 |
answer = canvas.run(stream=stream)
|
| 118 |
+
logger.info(canvas)
|
| 119 |
except Exception as e:
|
| 120 |
return server_error_response(e)
|
| 121 |
|
api/apps/llm_app.py
CHANGED
|
@@ -25,6 +25,7 @@ from api.db.db_models import TenantLLM
|
|
| 25 |
from api.utils.api_utils import get_json_result
|
| 26 |
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
|
| 27 |
import requests
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
@manager.route('/factories', methods=['GET'])
|
|
@@ -89,7 +90,7 @@ def set_api_key():
|
|
| 89 |
if len(arr) == 0 or tc == 0:
|
| 90 |
raise Exception("Fail")
|
| 91 |
rerank_passed = True
|
| 92 |
-
|
| 93 |
except Exception as e:
|
| 94 |
msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
|
| 95 |
e)
|
|
|
|
| 25 |
from api.utils.api_utils import get_json_result
|
| 26 |
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
|
| 27 |
import requests
|
| 28 |
+
from api.utils.log_utils import logger
|
| 29 |
|
| 30 |
|
| 31 |
@manager.route('/factories', methods=['GET'])
|
|
|
|
| 90 |
if len(arr) == 0 or tc == 0:
|
| 91 |
raise Exception("Fail")
|
| 92 |
rerank_passed = True
|
| 93 |
+
logger.info(f'passed model rerank {llm.llm_name}')
|
| 94 |
except Exception as e:
|
| 95 |
msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
|
| 96 |
e)
|
api/apps/sdk/dataset.py
CHANGED
|
@@ -526,4 +526,4 @@ def list(tenant_id):
|
|
| 526 |
new_key = key_mapping.get(key, key)
|
| 527 |
renamed_data[new_key] = value
|
| 528 |
renamed_list.append(renamed_data)
|
| 529 |
-
return get_result(data=renamed_list)
|
|
|
|
| 526 |
new_key = key_mapping.get(key, key)
|
| 527 |
renamed_data[new_key] = value
|
| 528 |
renamed_list.append(renamed_data)
|
| 529 |
+
return get_result(data=renamed_list)
|
api/apps/user_app.py
CHANGED
|
@@ -53,8 +53,8 @@ from api.settings import (
|
|
| 53 |
)
|
| 54 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
| 55 |
from api.db.services.file_service import FileService
|
| 56 |
-
from api.settings import stat_logger
|
| 57 |
from api.utils.api_utils import get_json_result, construct_response
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
@manager.route("/login", methods=["POST", "GET"])
|
|
@@ -177,7 +177,7 @@ def github_callback():
|
|
| 177 |
try:
|
| 178 |
avatar = download_img(user_info["avatar_url"])
|
| 179 |
except Exception as e:
|
| 180 |
-
|
| 181 |
avatar = ""
|
| 182 |
users = user_register(
|
| 183 |
user_id,
|
|
@@ -202,7 +202,7 @@ def github_callback():
|
|
| 202 |
return redirect("/?auth=%s" % user.get_id())
|
| 203 |
except Exception as e:
|
| 204 |
rollback_user_registration(user_id)
|
| 205 |
-
|
| 206 |
return redirect("/?error=%s" % str(e))
|
| 207 |
|
| 208 |
# User has already registered, try to log in
|
|
@@ -279,7 +279,7 @@ def feishu_callback():
|
|
| 279 |
try:
|
| 280 |
avatar = download_img(user_info["avatar_url"])
|
| 281 |
except Exception as e:
|
| 282 |
-
|
| 283 |
avatar = ""
|
| 284 |
users = user_register(
|
| 285 |
user_id,
|
|
@@ -304,7 +304,7 @@ def feishu_callback():
|
|
| 304 |
return redirect("/?auth=%s" % user.get_id())
|
| 305 |
except Exception as e:
|
| 306 |
rollback_user_registration(user_id)
|
| 307 |
-
|
| 308 |
return redirect("/?error=%s" % str(e))
|
| 309 |
|
| 310 |
# User has already registered, try to log in
|
|
@@ -436,7 +436,7 @@ def setting_user():
|
|
| 436 |
UserService.update_by_id(current_user.id, update_dict)
|
| 437 |
return get_json_result(data=True)
|
| 438 |
except Exception as e:
|
| 439 |
-
|
| 440 |
return get_json_result(
|
| 441 |
data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
|
| 442 |
)
|
|
@@ -621,7 +621,7 @@ def user_add():
|
|
| 621 |
)
|
| 622 |
except Exception as e:
|
| 623 |
rollback_user_registration(user_id)
|
| 624 |
-
|
| 625 |
return get_json_result(
|
| 626 |
data=False,
|
| 627 |
message=f"User registration failure, error: {str(e)}",
|
|
|
|
| 53 |
)
|
| 54 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
| 55 |
from api.db.services.file_service import FileService
|
|
|
|
| 56 |
from api.utils.api_utils import get_json_result, construct_response
|
| 57 |
+
from api.utils.log_utils import logger
|
| 58 |
|
| 59 |
|
| 60 |
@manager.route("/login", methods=["POST", "GET"])
|
|
|
|
| 177 |
try:
|
| 178 |
avatar = download_img(user_info["avatar_url"])
|
| 179 |
except Exception as e:
|
| 180 |
+
logger.exception(e)
|
| 181 |
avatar = ""
|
| 182 |
users = user_register(
|
| 183 |
user_id,
|
|
|
|
| 202 |
return redirect("/?auth=%s" % user.get_id())
|
| 203 |
except Exception as e:
|
| 204 |
rollback_user_registration(user_id)
|
| 205 |
+
logger.exception(e)
|
| 206 |
return redirect("/?error=%s" % str(e))
|
| 207 |
|
| 208 |
# User has already registered, try to log in
|
|
|
|
| 279 |
try:
|
| 280 |
avatar = download_img(user_info["avatar_url"])
|
| 281 |
except Exception as e:
|
| 282 |
+
logger.exception(e)
|
| 283 |
avatar = ""
|
| 284 |
users = user_register(
|
| 285 |
user_id,
|
|
|
|
| 304 |
return redirect("/?auth=%s" % user.get_id())
|
| 305 |
except Exception as e:
|
| 306 |
rollback_user_registration(user_id)
|
| 307 |
+
logger.exception(e)
|
| 308 |
return redirect("/?error=%s" % str(e))
|
| 309 |
|
| 310 |
# User has already registered, try to log in
|
|
|
|
| 436 |
UserService.update_by_id(current_user.id, update_dict)
|
| 437 |
return get_json_result(data=True)
|
| 438 |
except Exception as e:
|
| 439 |
+
logger.exception(e)
|
| 440 |
return get_json_result(
|
| 441 |
data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
|
| 442 |
)
|
|
|
|
| 621 |
)
|
| 622 |
except Exception as e:
|
| 623 |
rollback_user_registration(user_id)
|
| 624 |
+
logger.exception(e)
|
| 625 |
return get_json_result(
|
| 626 |
data=False,
|
| 627 |
message=f"User registration failure, error: {str(e)}",
|
api/db/db_models.py
CHANGED
|
@@ -30,12 +30,9 @@ from peewee import (
|
|
| 30 |
)
|
| 31 |
from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
|
| 32 |
from api.db import SerializedType, ParserType
|
| 33 |
-
from api.settings import DATABASE,
|
| 34 |
-
from api.utils.log_utils import getLogger
|
| 35 |
from api import utils
|
| 36 |
-
|
| 37 |
-
LOGGER = getLogger()
|
| 38 |
-
|
| 39 |
|
| 40 |
def singleton(cls, *args, **kw):
|
| 41 |
instances = {}
|
|
@@ -288,7 +285,7 @@ class BaseDataBase:
|
|
| 288 |
database_config = DATABASE.copy()
|
| 289 |
db_name = database_config.pop("name")
|
| 290 |
self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
|
| 291 |
-
|
| 292 |
|
| 293 |
class PostgresDatabaseLock:
|
| 294 |
def __init__(self, lock_name, timeout=10, db=None):
|
|
@@ -396,7 +393,7 @@ def close_connection():
|
|
| 396 |
if DB:
|
| 397 |
DB.close_stale(age=30)
|
| 398 |
except Exception as e:
|
| 399 |
-
|
| 400 |
|
| 401 |
|
| 402 |
class DataBaseModel(BaseModel):
|
|
@@ -412,15 +409,15 @@ def init_database_tables(alter_fields=[]):
|
|
| 412 |
for name, obj in members:
|
| 413 |
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
|
| 414 |
table_objs.append(obj)
|
| 415 |
-
|
| 416 |
try:
|
| 417 |
obj.create_table()
|
| 418 |
-
|
| 419 |
except Exception as e:
|
| 420 |
-
|
| 421 |
create_failed_list.append(obj.__name__)
|
| 422 |
if create_failed_list:
|
| 423 |
-
|
| 424 |
raise Exception(f"create tables failed: {create_failed_list}")
|
| 425 |
migrate_db()
|
| 426 |
|
|
|
|
| 30 |
)
|
| 31 |
from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
|
| 32 |
from api.db import SerializedType, ParserType
|
| 33 |
+
from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
|
|
|
|
| 34 |
from api import utils
|
| 35 |
+
from api.utils.log_utils import logger
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def singleton(cls, *args, **kw):
|
| 38 |
instances = {}
|
|
|
|
| 285 |
database_config = DATABASE.copy()
|
| 286 |
db_name = database_config.pop("name")
|
| 287 |
self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
|
| 288 |
+
logger.info('init database on cluster mode successfully')
|
| 289 |
|
| 290 |
class PostgresDatabaseLock:
|
| 291 |
def __init__(self, lock_name, timeout=10, db=None):
|
|
|
|
| 393 |
if DB:
|
| 394 |
DB.close_stale(age=30)
|
| 395 |
except Exception as e:
|
| 396 |
+
logger.exception(e)
|
| 397 |
|
| 398 |
|
| 399 |
class DataBaseModel(BaseModel):
|
|
|
|
| 409 |
for name, obj in members:
|
| 410 |
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
|
| 411 |
table_objs.append(obj)
|
| 412 |
+
logger.info(f"start create table {obj.__name__}")
|
| 413 |
try:
|
| 414 |
obj.create_table()
|
| 415 |
+
logger.info(f"create table success: {obj.__name__}")
|
| 416 |
except Exception as e:
|
| 417 |
+
logger.exception(e)
|
| 418 |
create_failed_list.append(obj.__name__)
|
| 419 |
if create_failed_list:
|
| 420 |
+
logger.info(f"create tables failed: {create_failed_list}")
|
| 421 |
raise Exception(f"create tables failed: {create_failed_list}")
|
| 422 |
migrate_db()
|
| 423 |
|
api/db/db_utils.py
CHANGED
|
@@ -22,12 +22,6 @@ from playhouse.pool import PooledMySQLDatabase
|
|
| 22 |
from api.utils import current_timestamp, timestamp_to_date
|
| 23 |
|
| 24 |
from api.db.db_models import DB, DataBaseModel
|
| 25 |
-
from api.db.runtime_config import RuntimeConfig
|
| 26 |
-
from api.utils.log_utils import getLogger
|
| 27 |
-
from enum import Enum
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
LOGGER = getLogger()
|
| 31 |
|
| 32 |
|
| 33 |
@DB.connection_context()
|
|
|
|
| 22 |
from api.utils import current_timestamp, timestamp_to_date
|
| 23 |
|
| 24 |
from api.db.db_models import DB, DataBaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
@DB.connection_context()
|
api/db/init_data.py
CHANGED
|
@@ -30,6 +30,7 @@ from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantL
|
|
| 30 |
from api.db.services.user_service import TenantService, UserTenantService
|
| 31 |
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
| 32 |
from api.utils.file_utils import get_project_base_directory
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def encode_to_base64(input_string):
|
|
@@ -69,36 +70,34 @@ def init_superuser():
|
|
| 69 |
"api_key": API_KEY, "api_base": LLM_BASE_URL})
|
| 70 |
|
| 71 |
if not UserService.save(**user_info):
|
| 72 |
-
|
| 73 |
return
|
| 74 |
TenantService.insert(**tenant)
|
| 75 |
UserTenantService.insert(**usr_tenant)
|
| 76 |
TenantLLMService.insert_many(tenant_llm)
|
| 77 |
-
|
| 78 |
-
"
|
| 79 |
|
| 80 |
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
| 81 |
msg = chat_mdl.chat(system="", history=[
|
| 82 |
{"role": "user", "content": "Hello!"}], gen_conf={})
|
| 83 |
if msg.find("ERROR: ") == 0:
|
| 84 |
-
|
| 85 |
-
"\33[91m【ERROR】\33[0m: ",
|
| 86 |
"'{}' dosen't work. {}".format(
|
| 87 |
tenant["llm_id"],
|
| 88 |
msg))
|
| 89 |
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
|
| 90 |
v, c = embd_mdl.encode(["Hello!"])
|
| 91 |
if c == 0:
|
| 92 |
-
|
| 93 |
-
"
|
| 94 |
-
" '{}' dosen't work!".format(
|
| 95 |
tenant["embd_id"]))
|
| 96 |
|
| 97 |
|
| 98 |
def init_llm_factory():
|
| 99 |
try:
|
| 100 |
LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
|
| 101 |
-
except Exception
|
| 102 |
pass
|
| 103 |
|
| 104 |
factory_llm_infos = json.load(
|
|
@@ -111,14 +110,14 @@ def init_llm_factory():
|
|
| 111 |
llm_infos = factory_llm_info.pop("llm")
|
| 112 |
try:
|
| 113 |
LLMFactoriesService.save(**factory_llm_info)
|
| 114 |
-
except Exception
|
| 115 |
pass
|
| 116 |
LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
|
| 117 |
for llm_info in llm_infos:
|
| 118 |
llm_info["fid"] = factory_llm_info["name"]
|
| 119 |
try:
|
| 120 |
LLMService.save(**llm_info)
|
| 121 |
-
except Exception
|
| 122 |
pass
|
| 123 |
|
| 124 |
LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
|
|
@@ -145,7 +144,7 @@ def init_llm_factory():
|
|
| 145 |
row = deepcopy(row)
|
| 146 |
row["llm_name"] = "text-embedding-3-large"
|
| 147 |
TenantLLMService.save(**row)
|
| 148 |
-
except Exception
|
| 149 |
pass
|
| 150 |
break
|
| 151 |
for kb_id in KnowledgebaseService.get_all_ids():
|
|
@@ -169,9 +168,8 @@ def add_graph_templates():
|
|
| 169 |
CanvasTemplateService.save(**cnvs)
|
| 170 |
except:
|
| 171 |
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
|
| 172 |
-
except Exception
|
| 173 |
-
|
| 174 |
-
print("------------", flush=True)
|
| 175 |
|
| 176 |
|
| 177 |
def init_web_data():
|
|
@@ -182,7 +180,7 @@ def init_web_data():
|
|
| 182 |
# init_superuser()
|
| 183 |
|
| 184 |
add_graph_templates()
|
| 185 |
-
|
| 186 |
|
| 187 |
|
| 188 |
if __name__ == '__main__':
|
|
|
|
| 30 |
from api.db.services.user_service import TenantService, UserTenantService
|
| 31 |
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
| 32 |
from api.utils.file_utils import get_project_base_directory
|
| 33 |
+
from api.utils.log_utils import logger
|
| 34 |
|
| 35 |
|
| 36 |
def encode_to_base64(input_string):
|
|
|
|
| 70 |
"api_key": API_KEY, "api_base": LLM_BASE_URL})
|
| 71 |
|
| 72 |
if not UserService.save(**user_info):
|
| 73 |
+
logger.info("can't init admin.")
|
| 74 |
return
|
| 75 |
TenantService.insert(**tenant)
|
| 76 |
UserTenantService.insert(**usr_tenant)
|
| 77 |
TenantLLMService.insert_many(tenant_llm)
|
| 78 |
+
logger.info(
|
| 79 |
+
"Super user initialized. email: [email protected], password: admin. Changing the password after logining is strongly recomanded.")
|
| 80 |
|
| 81 |
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
| 82 |
msg = chat_mdl.chat(system="", history=[
|
| 83 |
{"role": "user", "content": "Hello!"}], gen_conf={})
|
| 84 |
if msg.find("ERROR: ") == 0:
|
| 85 |
+
logger.error(
|
|
|
|
| 86 |
"'{}' dosen't work. {}".format(
|
| 87 |
tenant["llm_id"],
|
| 88 |
msg))
|
| 89 |
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
|
| 90 |
v, c = embd_mdl.encode(["Hello!"])
|
| 91 |
if c == 0:
|
| 92 |
+
logger.error(
|
| 93 |
+
"'{}' dosen't work!".format(
|
|
|
|
| 94 |
tenant["embd_id"]))
|
| 95 |
|
| 96 |
|
| 97 |
def init_llm_factory():
|
| 98 |
try:
|
| 99 |
LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
|
| 100 |
+
except Exception:
|
| 101 |
pass
|
| 102 |
|
| 103 |
factory_llm_infos = json.load(
|
|
|
|
| 110 |
llm_infos = factory_llm_info.pop("llm")
|
| 111 |
try:
|
| 112 |
LLMFactoriesService.save(**factory_llm_info)
|
| 113 |
+
except Exception:
|
| 114 |
pass
|
| 115 |
LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
|
| 116 |
for llm_info in llm_infos:
|
| 117 |
llm_info["fid"] = factory_llm_info["name"]
|
| 118 |
try:
|
| 119 |
LLMService.save(**llm_info)
|
| 120 |
+
except Exception:
|
| 121 |
pass
|
| 122 |
|
| 123 |
LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
|
|
|
|
| 144 |
row = deepcopy(row)
|
| 145 |
row["llm_name"] = "text-embedding-3-large"
|
| 146 |
TenantLLMService.save(**row)
|
| 147 |
+
except Exception:
|
| 148 |
pass
|
| 149 |
break
|
| 150 |
for kb_id in KnowledgebaseService.get_all_ids():
|
|
|
|
| 168 |
CanvasTemplateService.save(**cnvs)
|
| 169 |
except:
|
| 170 |
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
|
| 171 |
+
except Exception:
|
| 172 |
+
logger.exception("Add graph templates error: ")
|
|
|
|
| 173 |
|
| 174 |
|
| 175 |
def init_web_data():
|
|
|
|
| 180 |
# init_superuser()
|
| 181 |
|
| 182 |
add_graph_templates()
|
| 183 |
+
logger.info("init web data success:{}".format(time.time() - start_time))
|
| 184 |
|
| 185 |
|
| 186 |
if __name__ == '__main__':
|
api/db/operatioins.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
#
|
| 2 |
-
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
| 3 |
-
#
|
| 4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
-
# you may not use this file except in compliance with the License.
|
| 6 |
-
# You may obtain a copy of the License at
|
| 7 |
-
#
|
| 8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
-
#
|
| 10 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
-
# See the License for the specific language governing permissions and
|
| 14 |
-
# limitations under the License.
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
import operator
|
| 18 |
-
import time
|
| 19 |
-
import typing
|
| 20 |
-
from api.utils.log_utils import sql_logger
|
| 21 |
-
import peewee
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/db/services/dialog_service.py
CHANGED
|
@@ -26,11 +26,12 @@ from api.db.db_models import Dialog, Conversation,DB
|
|
| 26 |
from api.db.services.common_service import CommonService
|
| 27 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 28 |
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
| 29 |
-
from api.settings import
|
| 30 |
from rag.app.resume import forbidden_select_fields4resume
|
| 31 |
from rag.nlp.search import index_name
|
| 32 |
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
| 33 |
from api.utils.file_utils import get_project_base_directory
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
class DialogService(CommonService):
|
|
@@ -177,7 +178,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
| 177 |
tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
|
| 178 |
# try to use sql if field mapping is good to go
|
| 179 |
if field_map:
|
| 180 |
-
|
| 181 |
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
|
| 182 |
if ans:
|
| 183 |
yield ans
|
|
@@ -219,7 +220,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
| 219 |
doc_ids=attachments,
|
| 220 |
top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
|
| 221 |
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
|
| 222 |
-
|
| 223 |
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
|
| 224 |
retrieval_tm = timer()
|
| 225 |
|
|
@@ -291,7 +292,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
| 291 |
yield decorate_answer(answer)
|
| 292 |
else:
|
| 293 |
answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
|
| 294 |
-
|
| 295 |
msg[-1]["content"], answer))
|
| 296 |
res = decorate_answer(answer)
|
| 297 |
res["audio_binary"] = tts(tts_mdl, answer)
|
|
@@ -319,8 +320,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
| 319 |
nonlocal sys_prompt, user_promt, question, tried_times
|
| 320 |
sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
|
| 321 |
"temperature": 0.06})
|
| 322 |
-
|
| 323 |
-
chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}")
|
| 324 |
sql = re.sub(r"[\r\n]+", " ", sql.lower())
|
| 325 |
sql = re.sub(r".*select ", "select ", sql.lower())
|
| 326 |
sql = re.sub(r" +", " ", sql)
|
|
@@ -340,9 +340,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
| 340 |
flds.append(k)
|
| 341 |
sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
chat_logger.info(f"“{question}” get SQL(refined): {sql}")
|
| 346 |
tried_times += 1
|
| 347 |
return retrievaler.sql_retrieval(sql, format="json"), sql
|
| 348 |
|
|
@@ -371,10 +369,9 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
| 371 |
question, sql, tbl["error"]
|
| 372 |
)
|
| 373 |
tbl, sql = get_table()
|
| 374 |
-
|
| 375 |
|
| 376 |
-
|
| 377 |
-
print(tbl)
|
| 378 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
| 379 |
return None
|
| 380 |
|
|
@@ -404,7 +401,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
| 404 |
rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
|
| 405 |
|
| 406 |
if not docid_idx or not docnm_idx:
|
| 407 |
-
|
| 408 |
return {
|
| 409 |
"answer": "\n".join([clmns, line, rows]),
|
| 410 |
"reference": {"chunks": [], "doc_aggs": []},
|
|
|
|
| 26 |
from api.db.services.common_service import CommonService
|
| 27 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 28 |
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
| 29 |
+
from api.settings import retrievaler, kg_retrievaler
|
| 30 |
from rag.app.resume import forbidden_select_fields4resume
|
| 31 |
from rag.nlp.search import index_name
|
| 32 |
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
| 33 |
from api.utils.file_utils import get_project_base_directory
|
| 34 |
+
from api.utils.log_utils import logger
|
| 35 |
|
| 36 |
|
| 37 |
class DialogService(CommonService):
|
|
|
|
| 178 |
tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
|
| 179 |
# try to use sql if field mapping is good to go
|
| 180 |
if field_map:
|
| 181 |
+
logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
| 182 |
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
|
| 183 |
if ans:
|
| 184 |
yield ans
|
|
|
|
| 220 |
doc_ids=attachments,
|
| 221 |
top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
|
| 222 |
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
|
| 223 |
+
logger.info(
|
| 224 |
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
|
| 225 |
retrieval_tm = timer()
|
| 226 |
|
|
|
|
| 292 |
yield decorate_answer(answer)
|
| 293 |
else:
|
| 294 |
answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
|
| 295 |
+
logger.info("User: {}|Assistant: {}".format(
|
| 296 |
msg[-1]["content"], answer))
|
| 297 |
res = decorate_answer(answer)
|
| 298 |
res["audio_binary"] = tts(tts_mdl, answer)
|
|
|
|
| 320 |
nonlocal sys_prompt, user_promt, question, tried_times
|
| 321 |
sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
|
| 322 |
"temperature": 0.06})
|
| 323 |
+
logger.info(f"{question} ==> {user_promt} get SQL: {sql}")
|
|
|
|
| 324 |
sql = re.sub(r"[\r\n]+", " ", sql.lower())
|
| 325 |
sql = re.sub(r".*select ", "select ", sql.lower())
|
| 326 |
sql = re.sub(r" +", " ", sql)
|
|
|
|
| 340 |
flds.append(k)
|
| 341 |
sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
|
| 342 |
|
| 343 |
+
logger.info(f"{question} get SQL(refined): {sql}")
|
|
|
|
|
|
|
| 344 |
tried_times += 1
|
| 345 |
return retrievaler.sql_retrieval(sql, format="json"), sql
|
| 346 |
|
|
|
|
| 369 |
question, sql, tbl["error"]
|
| 370 |
)
|
| 371 |
tbl, sql = get_table()
|
| 372 |
+
logger.info("TRY it again: {}".format(sql))
|
| 373 |
|
| 374 |
+
logger.info("GET table: {}".format(tbl))
|
|
|
|
| 375 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
| 376 |
return None
|
| 377 |
|
|
|
|
| 401 |
rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
|
| 402 |
|
| 403 |
if not docid_idx or not docnm_idx:
|
| 404 |
+
logger.warning("SQL missing field: " + sql)
|
| 405 |
return {
|
| 406 |
"answer": "\n".join([clmns, line, rows]),
|
| 407 |
"reference": {"chunks": [], "doc_aggs": []},
|
api/db/services/document_service.py
CHANGED
|
@@ -17,7 +17,6 @@ import hashlib
|
|
| 17 |
import json
|
| 18 |
import random
|
| 19 |
import re
|
| 20 |
-
import traceback
|
| 21 |
from concurrent.futures import ThreadPoolExecutor
|
| 22 |
from copy import deepcopy
|
| 23 |
from datetime import datetime
|
|
@@ -26,7 +25,7 @@ from io import BytesIO
|
|
| 26 |
from peewee import fn
|
| 27 |
|
| 28 |
from api.db.db_utils import bulk_insert_into_db
|
| 29 |
-
from api.settings import
|
| 30 |
from api.utils import current_timestamp, get_format_time, get_uuid
|
| 31 |
from graphrag.mind_map_extractor import MindMapExtractor
|
| 32 |
from rag.settings import SVR_QUEUE_NAME
|
|
@@ -40,6 +39,7 @@ from api.db.services.common_service import CommonService
|
|
| 40 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 41 |
from api.db import StatusEnum
|
| 42 |
from rag.utils.redis_conn import REDIS_CONN
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
class DocumentService(CommonService):
|
|
@@ -387,7 +387,7 @@ class DocumentService(CommonService):
|
|
| 387 |
cls.update_by_id(d["id"], info)
|
| 388 |
except Exception as e:
|
| 389 |
if str(e).find("'0'") < 0:
|
| 390 |
-
|
| 391 |
|
| 392 |
@classmethod
|
| 393 |
@DB.connection_context()
|
|
@@ -544,7 +544,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|
| 544 |
"knowledge_graph_kwd": "mind_map"
|
| 545 |
})
|
| 546 |
except Exception as e:
|
| 547 |
-
|
| 548 |
|
| 549 |
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
| 550 |
assert len(cks) == len(vects)
|
|
|
|
| 17 |
import json
|
| 18 |
import random
|
| 19 |
import re
|
|
|
|
| 20 |
from concurrent.futures import ThreadPoolExecutor
|
| 21 |
from copy import deepcopy
|
| 22 |
from datetime import datetime
|
|
|
|
| 25 |
from peewee import fn
|
| 26 |
|
| 27 |
from api.db.db_utils import bulk_insert_into_db
|
| 28 |
+
from api.settings import docStoreConn
|
| 29 |
from api.utils import current_timestamp, get_format_time, get_uuid
|
| 30 |
from graphrag.mind_map_extractor import MindMapExtractor
|
| 31 |
from rag.settings import SVR_QUEUE_NAME
|
|
|
|
| 39 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 40 |
from api.db import StatusEnum
|
| 41 |
from rag.utils.redis_conn import REDIS_CONN
|
| 42 |
+
from api.utils.log_utils import logger
|
| 43 |
|
| 44 |
|
| 45 |
class DocumentService(CommonService):
|
|
|
|
| 387 |
cls.update_by_id(d["id"], info)
|
| 388 |
except Exception as e:
|
| 389 |
if str(e).find("'0'") < 0:
|
| 390 |
+
logger.exception("fetch task exception")
|
| 391 |
|
| 392 |
@classmethod
|
| 393 |
@DB.connection_context()
|
|
|
|
| 544 |
"knowledge_graph_kwd": "mind_map"
|
| 545 |
})
|
| 546 |
except Exception as e:
|
| 547 |
+
logger.exception("Mind map generation error")
|
| 548 |
|
| 549 |
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
| 550 |
assert len(cks) == len(vects)
|
api/db/services/file_service.py
CHANGED
|
@@ -28,6 +28,7 @@ from api.db.services.file2document_service import File2DocumentService
|
|
| 28 |
from api.utils import get_uuid
|
| 29 |
from api.utils.file_utils import filename_type, thumbnail_img
|
| 30 |
from rag.utils.storage_factory import STORAGE_IMPL
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
class FileService(CommonService):
|
|
@@ -272,8 +273,8 @@ class FileService(CommonService):
|
|
| 272 |
cls.delete_folder_by_pf_id(user_id, file.id)
|
| 273 |
return cls.model.delete().where((cls.model.tenant_id == user_id)
|
| 274 |
& (cls.model.id == folder_id)).execute(),
|
| 275 |
-
except Exception
|
| 276 |
-
|
| 277 |
raise RuntimeError("Database error (File retrieval)!")
|
| 278 |
|
| 279 |
@classmethod
|
|
@@ -321,8 +322,8 @@ class FileService(CommonService):
|
|
| 321 |
def move_file(cls, file_ids, folder_id):
|
| 322 |
try:
|
| 323 |
cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
|
| 324 |
-
except Exception
|
| 325 |
-
|
| 326 |
raise RuntimeError("Database error (File move)!")
|
| 327 |
|
| 328 |
@classmethod
|
|
|
|
| 28 |
from api.utils import get_uuid
|
| 29 |
from api.utils.file_utils import filename_type, thumbnail_img
|
| 30 |
from rag.utils.storage_factory import STORAGE_IMPL
|
| 31 |
+
from api.utils.log_utils import logger
|
| 32 |
|
| 33 |
|
| 34 |
class FileService(CommonService):
|
|
|
|
| 273 |
cls.delete_folder_by_pf_id(user_id, file.id)
|
| 274 |
return cls.model.delete().where((cls.model.tenant_id == user_id)
|
| 275 |
& (cls.model.id == folder_id)).execute(),
|
| 276 |
+
except Exception:
|
| 277 |
+
logger.exception("delete_folder_by_pf_id")
|
| 278 |
raise RuntimeError("Database error (File retrieval)!")
|
| 279 |
|
| 280 |
@classmethod
|
|
|
|
| 322 |
def move_file(cls, file_ids, folder_id):
|
| 323 |
try:
|
| 324 |
cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
|
| 325 |
+
except Exception:
|
| 326 |
+
logger.exception("move_file")
|
| 327 |
raise RuntimeError("Database error (File move)!")
|
| 328 |
|
| 329 |
@classmethod
|
api/db/services/llm_service.py
CHANGED
|
@@ -14,12 +14,12 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
from api.db.services.user_service import TenantService
|
| 17 |
-
from api.settings import database_logger
|
| 18 |
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
|
| 19 |
from api.db import LLMType
|
| 20 |
from api.db.db_models import DB
|
| 21 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
| 22 |
from api.db.services.common_service import CommonService
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class LLMFactoriesService(CommonService):
|
|
@@ -209,40 +209,40 @@ class LLMBundle(object):
|
|
| 209 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
| 210 |
if not TenantLLMService.increase_usage(
|
| 211 |
self.tenant_id, self.llm_type, used_tokens):
|
| 212 |
-
|
| 213 |
-
"
|
| 214 |
return emd, used_tokens
|
| 215 |
|
| 216 |
def encode_queries(self, query: str):
|
| 217 |
emd, used_tokens = self.mdl.encode_queries(query)
|
| 218 |
if not TenantLLMService.increase_usage(
|
| 219 |
self.tenant_id, self.llm_type, used_tokens):
|
| 220 |
-
|
| 221 |
-
"
|
| 222 |
return emd, used_tokens
|
| 223 |
|
| 224 |
def similarity(self, query: str, texts: list):
|
| 225 |
sim, used_tokens = self.mdl.similarity(query, texts)
|
| 226 |
if not TenantLLMService.increase_usage(
|
| 227 |
self.tenant_id, self.llm_type, used_tokens):
|
| 228 |
-
|
| 229 |
-
"
|
| 230 |
return sim, used_tokens
|
| 231 |
|
| 232 |
def describe(self, image, max_tokens=300):
|
| 233 |
txt, used_tokens = self.mdl.describe(image, max_tokens)
|
| 234 |
if not TenantLLMService.increase_usage(
|
| 235 |
self.tenant_id, self.llm_type, used_tokens):
|
| 236 |
-
|
| 237 |
-
"
|
| 238 |
return txt
|
| 239 |
|
| 240 |
def transcription(self, audio):
|
| 241 |
txt, used_tokens = self.mdl.transcription(audio)
|
| 242 |
if not TenantLLMService.increase_usage(
|
| 243 |
self.tenant_id, self.llm_type, used_tokens):
|
| 244 |
-
|
| 245 |
-
"
|
| 246 |
return txt
|
| 247 |
|
| 248 |
def tts(self, text):
|
|
@@ -250,8 +250,8 @@ class LLMBundle(object):
|
|
| 250 |
if isinstance(chunk,int):
|
| 251 |
if not TenantLLMService.increase_usage(
|
| 252 |
self.tenant_id, self.llm_type, chunk, self.llm_name):
|
| 253 |
-
|
| 254 |
-
"
|
| 255 |
return
|
| 256 |
yield chunk
|
| 257 |
|
|
@@ -259,8 +259,8 @@ class LLMBundle(object):
|
|
| 259 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 260 |
if isinstance(txt, int) and not TenantLLMService.increase_usage(
|
| 261 |
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
| 262 |
-
|
| 263 |
-
"
|
| 264 |
return txt
|
| 265 |
|
| 266 |
def chat_streamly(self, system, history, gen_conf):
|
|
@@ -268,7 +268,7 @@ class LLMBundle(object):
|
|
| 268 |
if isinstance(txt, int):
|
| 269 |
if not TenantLLMService.increase_usage(
|
| 270 |
self.tenant_id, self.llm_type, txt, self.llm_name):
|
| 271 |
-
|
| 272 |
-
"
|
| 273 |
return
|
| 274 |
yield txt
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
from api.db.services.user_service import TenantService
|
|
|
|
| 17 |
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
|
| 18 |
from api.db import LLMType
|
| 19 |
from api.db.db_models import DB
|
| 20 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
| 21 |
from api.db.services.common_service import CommonService
|
| 22 |
+
from api.utils.log_utils import logger
|
| 23 |
|
| 24 |
|
| 25 |
class LLMFactoriesService(CommonService):
|
|
|
|
| 209 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
| 210 |
if not TenantLLMService.increase_usage(
|
| 211 |
self.tenant_id, self.llm_type, used_tokens):
|
| 212 |
+
logger.error(
|
| 213 |
+
"LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
| 214 |
return emd, used_tokens
|
| 215 |
|
| 216 |
def encode_queries(self, query: str):
|
| 217 |
emd, used_tokens = self.mdl.encode_queries(query)
|
| 218 |
if not TenantLLMService.increase_usage(
|
| 219 |
self.tenant_id, self.llm_type, used_tokens):
|
| 220 |
+
logger.error(
|
| 221 |
+
"LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
| 222 |
return emd, used_tokens
|
| 223 |
|
| 224 |
def similarity(self, query: str, texts: list):
|
| 225 |
sim, used_tokens = self.mdl.similarity(query, texts)
|
| 226 |
if not TenantLLMService.increase_usage(
|
| 227 |
self.tenant_id, self.llm_type, used_tokens):
|
| 228 |
+
logger.error(
|
| 229 |
+
"LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
|
| 230 |
return sim, used_tokens
|
| 231 |
|
| 232 |
def describe(self, image, max_tokens=300):
|
| 233 |
txt, used_tokens = self.mdl.describe(image, max_tokens)
|
| 234 |
if not TenantLLMService.increase_usage(
|
| 235 |
self.tenant_id, self.llm_type, used_tokens):
|
| 236 |
+
logger.error(
|
| 237 |
+
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
| 238 |
return txt
|
| 239 |
|
| 240 |
def transcription(self, audio):
|
| 241 |
txt, used_tokens = self.mdl.transcription(audio)
|
| 242 |
if not TenantLLMService.increase_usage(
|
| 243 |
self.tenant_id, self.llm_type, used_tokens):
|
| 244 |
+
logger.error(
|
| 245 |
+
"LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
| 246 |
return txt
|
| 247 |
|
| 248 |
def tts(self, text):
|
|
|
|
| 250 |
if isinstance(chunk,int):
|
| 251 |
if not TenantLLMService.increase_usage(
|
| 252 |
self.tenant_id, self.llm_type, chunk, self.llm_name):
|
| 253 |
+
logger.error(
|
| 254 |
+
"LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
|
| 255 |
return
|
| 256 |
yield chunk
|
| 257 |
|
|
|
|
| 259 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
| 260 |
if isinstance(txt, int) and not TenantLLMService.increase_usage(
|
| 261 |
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
| 262 |
+
logger.error(
|
| 263 |
+
"LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
|
| 264 |
return txt
|
| 265 |
|
| 266 |
def chat_streamly(self, system, history, gen_conf):
|
|
|
|
| 268 |
if isinstance(txt, int):
|
| 269 |
if not TenantLLMService.increase_usage(
|
| 270 |
self.tenant_id, self.llm_type, txt, self.llm_name):
|
| 271 |
+
logger.error(
|
| 272 |
+
"LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
|
| 273 |
return
|
| 274 |
yield txt
|
api/ragflow_server.py
CHANGED
|
@@ -27,13 +27,10 @@ from api.apps import app
|
|
| 27 |
from api.db.runtime_config import RuntimeConfig
|
| 28 |
from api.db.services.document_service import DocumentService
|
| 29 |
from api.settings import (
|
| 30 |
-
HOST,
|
| 31 |
-
HTTP_PORT,
|
| 32 |
-
access_logger,
|
| 33 |
-
database_logger,
|
| 34 |
-
stat_logger,
|
| 35 |
)
|
| 36 |
from api import utils
|
|
|
|
| 37 |
|
| 38 |
from api.db.db_models import init_database_tables as init_web_db
|
| 39 |
from api.db.init_data import init_web_data
|
|
@@ -45,23 +42,22 @@ def update_progress():
|
|
| 45 |
time.sleep(3)
|
| 46 |
try:
|
| 47 |
DocumentService.update_progress()
|
| 48 |
-
except Exception
|
| 49 |
-
|
| 50 |
|
| 51 |
|
| 52 |
-
if __name__ ==
|
| 53 |
-
|
| 54 |
-
r"""
|
| 55 |
____ ___ ______ ______ __
|
| 56 |
/ __ \ / | / ____// ____// /____ _ __
|
| 57 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
| 58 |
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
| 59 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
| 60 |
|
| 61 |
-
"""
|
| 62 |
-
|
|
|
|
| 63 |
)
|
| 64 |
-
stat_logger.info(f"project base: {utils.file_utils.get_project_base_directory()}")
|
| 65 |
|
| 66 |
# init db
|
| 67 |
init_web_db()
|
|
@@ -83,7 +79,7 @@ if __name__ == "__main__":
|
|
| 83 |
|
| 84 |
RuntimeConfig.DEBUG = args.debug
|
| 85 |
if RuntimeConfig.DEBUG:
|
| 86 |
-
|
| 87 |
|
| 88 |
RuntimeConfig.init_env()
|
| 89 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
|
@@ -91,17 +87,17 @@ if __name__ == "__main__":
|
|
| 91 |
peewee_logger = logging.getLogger("peewee")
|
| 92 |
peewee_logger.propagate = False
|
| 93 |
# rag_arch.common.log.ROpenHandler
|
| 94 |
-
peewee_logger.addHandler(
|
| 95 |
-
peewee_logger.setLevel(
|
| 96 |
|
| 97 |
thr = ThreadPoolExecutor(max_workers=1)
|
| 98 |
thr.submit(update_progress)
|
| 99 |
|
| 100 |
# start http server
|
| 101 |
try:
|
| 102 |
-
|
| 103 |
werkzeug_logger = logging.getLogger("werkzeug")
|
| 104 |
-
for h in
|
| 105 |
werkzeug_logger.addHandler(h)
|
| 106 |
run_simple(
|
| 107 |
hostname=HOST,
|
|
|
|
| 27 |
from api.db.runtime_config import RuntimeConfig
|
| 28 |
from api.db.services.document_service import DocumentService
|
| 29 |
from api.settings import (
|
| 30 |
+
HOST, HTTP_PORT
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
from api import utils
|
| 33 |
+
from api.utils.log_utils import logger
|
| 34 |
|
| 35 |
from api.db.db_models import init_database_tables as init_web_db
|
| 36 |
from api.db.init_data import init_web_data
|
|
|
|
| 42 |
time.sleep(3)
|
| 43 |
try:
|
| 44 |
DocumentService.update_progress()
|
| 45 |
+
except Exception:
|
| 46 |
+
logger.exception("update_progress exception")
|
| 47 |
|
| 48 |
|
| 49 |
+
if __name__ == '__main__':
|
| 50 |
+
logger.info(r"""
|
|
|
|
| 51 |
____ ___ ______ ______ __
|
| 52 |
/ __ \ / | / ____// ____// /____ _ __
|
| 53 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
| 54 |
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
| 55 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
| 56 |
|
| 57 |
+
""")
|
| 58 |
+
logger.info(
|
| 59 |
+
f'project base: {utils.file_utils.get_project_base_directory()}'
|
| 60 |
)
|
|
|
|
| 61 |
|
| 62 |
# init db
|
| 63 |
init_web_db()
|
|
|
|
| 79 |
|
| 80 |
RuntimeConfig.DEBUG = args.debug
|
| 81 |
if RuntimeConfig.DEBUG:
|
| 82 |
+
logger.info("run on debug mode")
|
| 83 |
|
| 84 |
RuntimeConfig.init_env()
|
| 85 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
|
|
|
| 87 |
peewee_logger = logging.getLogger("peewee")
|
| 88 |
peewee_logger.propagate = False
|
| 89 |
# rag_arch.common.log.ROpenHandler
|
| 90 |
+
peewee_logger.addHandler(logger.handlers[0])
|
| 91 |
+
peewee_logger.setLevel(logger.handlers[0].level)
|
| 92 |
|
| 93 |
thr = ThreadPoolExecutor(max_workers=1)
|
| 94 |
thr.submit(update_progress)
|
| 95 |
|
| 96 |
# start http server
|
| 97 |
try:
|
| 98 |
+
logger.info("RAG Flow http server start...")
|
| 99 |
werkzeug_logger = logging.getLogger("werkzeug")
|
| 100 |
+
for h in logger.handlers:
|
| 101 |
werkzeug_logger.addHandler(h)
|
| 102 |
run_simple(
|
| 103 |
hostname=HOST,
|
api/settings.py
CHANGED
|
@@ -17,24 +17,9 @@ import os
|
|
| 17 |
from datetime import date
|
| 18 |
from enum import IntEnum, Enum
|
| 19 |
from api.utils.file_utils import get_project_base_directory
|
| 20 |
-
from api.utils.log_utils import LoggerFactory, getLogger
|
| 21 |
import rag.utils.es_conn
|
| 22 |
import rag.utils.infinity_conn
|
| 23 |
|
| 24 |
-
# Logger
|
| 25 |
-
LoggerFactory.set_directory(
|
| 26 |
-
os.path.join(
|
| 27 |
-
get_project_base_directory(),
|
| 28 |
-
"logs",
|
| 29 |
-
"api"))
|
| 30 |
-
# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
|
| 31 |
-
LoggerFactory.LEVEL = 30
|
| 32 |
-
|
| 33 |
-
stat_logger = getLogger("stat")
|
| 34 |
-
access_logger = getLogger("access")
|
| 35 |
-
database_logger = getLogger("database")
|
| 36 |
-
chat_logger = getLogger("chat")
|
| 37 |
-
|
| 38 |
import rag.utils
|
| 39 |
from rag.nlp import search
|
| 40 |
from graphrag import search as kg_search
|
|
@@ -47,8 +32,6 @@ TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
|
|
| 47 |
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
| 48 |
LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
|
| 49 |
|
| 50 |
-
SUBPROCESS_STD_LOG_NAME = "std.log"
|
| 51 |
-
|
| 52 |
ERROR_REPORT = True
|
| 53 |
ERROR_REPORT_WITH_PATH = False
|
| 54 |
|
|
|
|
| 17 |
from datetime import date
|
| 18 |
from enum import IntEnum, Enum
|
| 19 |
from api.utils.file_utils import get_project_base_directory
|
|
|
|
| 20 |
import rag.utils.es_conn
|
| 21 |
import rag.utils.infinity_conn
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
import rag.utils
|
| 24 |
from rag.nlp import search
|
| 25 |
from graphrag import search as kg_search
|
|
|
|
| 32 |
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
| 33 |
LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
|
| 34 |
|
|
|
|
|
|
|
| 35 |
ERROR_REPORT = True
|
| 36 |
ERROR_REPORT_WITH_PATH = False
|
| 37 |
|
api/utils/api_utils.py
CHANGED
|
@@ -35,11 +35,12 @@ from werkzeug.http import HTTP_STATUS_CODES
|
|
| 35 |
from api.db.db_models import APIToken
|
| 36 |
from api.settings import (
|
| 37 |
REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
|
| 38 |
-
|
| 39 |
)
|
| 40 |
from api.settings import RetCode
|
| 41 |
from api.utils import CustomJSONEncoder, get_uuid
|
| 42 |
from api.utils import json_dumps
|
|
|
|
| 43 |
|
| 44 |
requests.models.complexjson.dumps = functools.partial(
|
| 45 |
json.dumps, cls=CustomJSONEncoder)
|
|
@@ -117,7 +118,7 @@ def get_data_error_result(code=RetCode.DATA_ERROR,
|
|
| 117 |
|
| 118 |
|
| 119 |
def server_error_response(e):
|
| 120 |
-
|
| 121 |
try:
|
| 122 |
if e.code == 401:
|
| 123 |
return get_json_result(code=401, message=repr(e))
|
|
@@ -258,7 +259,7 @@ def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
|
|
| 258 |
|
| 259 |
|
| 260 |
def construct_error_response(e):
|
| 261 |
-
|
| 262 |
try:
|
| 263 |
if e.code == 401:
|
| 264 |
return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
|
|
|
|
| 35 |
from api.db.db_models import APIToken
|
| 36 |
from api.settings import (
|
| 37 |
REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
|
| 38 |
+
CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
|
| 39 |
)
|
| 40 |
from api.settings import RetCode
|
| 41 |
from api.utils import CustomJSONEncoder, get_uuid
|
| 42 |
from api.utils import json_dumps
|
| 43 |
+
from api.utils.log_utils import logger
|
| 44 |
|
| 45 |
requests.models.complexjson.dumps = functools.partial(
|
| 46 |
json.dumps, cls=CustomJSONEncoder)
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
def server_error_response(e):
|
| 121 |
+
logger.exception(e)
|
| 122 |
try:
|
| 123 |
if e.code == 401:
|
| 124 |
return get_json_result(code=401, message=repr(e))
|
|
|
|
| 259 |
|
| 260 |
|
| 261 |
def construct_error_response(e):
|
| 262 |
+
logger.exception(e)
|
| 263 |
try:
|
| 264 |
if e.code == 401:
|
| 265 |
return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
|
api/utils/log_utils.py
CHANGED
|
@@ -14,300 +14,38 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import os
|
| 17 |
-
import typing
|
| 18 |
-
import traceback
|
| 19 |
import logging
|
| 20 |
-
import
|
| 21 |
-
from logging.handlers import TimedRotatingFileHandler
|
| 22 |
-
from threading import RLock
|
| 23 |
|
| 24 |
-
from api.utils import
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
logging.basicConfig(format=LOG_FORMAT)
|
| 31 |
-
LEVEL = logging.DEBUG
|
| 32 |
-
logger_dict = {}
|
| 33 |
-
global_handler_dict = {}
|
| 34 |
-
|
| 35 |
-
LOG_DIR = None
|
| 36 |
-
PARENT_LOG_DIR = None
|
| 37 |
-
log_share = True
|
| 38 |
-
|
| 39 |
-
append_to_parent_log = None
|
| 40 |
-
|
| 41 |
-
lock = RLock()
|
| 42 |
-
# CRITICAL = 50
|
| 43 |
-
# FATAL = CRITICAL
|
| 44 |
-
# ERROR = 40
|
| 45 |
-
# WARNING = 30
|
| 46 |
-
# WARN = WARNING
|
| 47 |
-
# INFO = 20
|
| 48 |
-
# DEBUG = 10
|
| 49 |
-
# NOTSET = 0
|
| 50 |
-
levels = (10, 20, 30, 40)
|
| 51 |
-
schedule_logger_dict = {}
|
| 52 |
-
|
| 53 |
-
@staticmethod
|
| 54 |
-
def set_directory(directory=None, parent_log_dir=None,
|
| 55 |
-
append_to_parent_log=None, force=False):
|
| 56 |
-
if parent_log_dir:
|
| 57 |
-
LoggerFactory.PARENT_LOG_DIR = parent_log_dir
|
| 58 |
-
if append_to_parent_log:
|
| 59 |
-
LoggerFactory.append_to_parent_log = append_to_parent_log
|
| 60 |
-
with LoggerFactory.lock:
|
| 61 |
-
if not directory:
|
| 62 |
-
directory = file_utils.get_project_base_directory("logs")
|
| 63 |
-
if not LoggerFactory.LOG_DIR or force:
|
| 64 |
-
LoggerFactory.LOG_DIR = directory
|
| 65 |
-
if LoggerFactory.log_share:
|
| 66 |
-
oldmask = os.umask(000)
|
| 67 |
-
os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
|
| 68 |
-
os.umask(oldmask)
|
| 69 |
-
else:
|
| 70 |
-
os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
|
| 71 |
-
for loggerName, ghandler in LoggerFactory.global_handler_dict.items():
|
| 72 |
-
for className, (logger,
|
| 73 |
-
handler) in LoggerFactory.logger_dict.items():
|
| 74 |
-
logger.removeHandler(ghandler)
|
| 75 |
-
ghandler.close()
|
| 76 |
-
LoggerFactory.global_handler_dict = {}
|
| 77 |
-
for className, (logger,
|
| 78 |
-
handler) in LoggerFactory.logger_dict.items():
|
| 79 |
-
logger.removeHandler(handler)
|
| 80 |
-
_handler = None
|
| 81 |
-
if handler:
|
| 82 |
-
handler.close()
|
| 83 |
-
if className != "default":
|
| 84 |
-
_handler = LoggerFactory.get_handler(className)
|
| 85 |
-
logger.addHandler(_handler)
|
| 86 |
-
LoggerFactory.assemble_global_handler(logger)
|
| 87 |
-
LoggerFactory.logger_dict[className] = logger, _handler
|
| 88 |
-
|
| 89 |
-
@staticmethod
|
| 90 |
-
def new_logger(name):
|
| 91 |
-
logger = logging.getLogger(name)
|
| 92 |
-
logger.propagate = False
|
| 93 |
-
logger.setLevel(LoggerFactory.LEVEL)
|
| 94 |
return logger
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
logger, handler = LoggerFactory.logger_dict[class_name]
|
| 101 |
-
if not logger:
|
| 102 |
-
logger, handler = LoggerFactory.init_logger(class_name)
|
| 103 |
-
else:
|
| 104 |
-
logger, handler = LoggerFactory.init_logger(class_name)
|
| 105 |
-
return logger
|
| 106 |
-
|
| 107 |
-
@staticmethod
|
| 108 |
-
def get_global_handler(logger_name, level=None, log_dir=None):
|
| 109 |
-
if not LoggerFactory.LOG_DIR:
|
| 110 |
-
return logging.StreamHandler()
|
| 111 |
-
if log_dir:
|
| 112 |
-
logger_name_key = logger_name + "_" + log_dir
|
| 113 |
-
else:
|
| 114 |
-
logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR
|
| 115 |
-
# if loggerName not in LoggerFactory.globalHandlerDict:
|
| 116 |
-
if logger_name_key not in LoggerFactory.global_handler_dict:
|
| 117 |
-
with LoggerFactory.lock:
|
| 118 |
-
if logger_name_key not in LoggerFactory.global_handler_dict:
|
| 119 |
-
handler = LoggerFactory.get_handler(
|
| 120 |
-
logger_name, level, log_dir)
|
| 121 |
-
LoggerFactory.global_handler_dict[logger_name_key] = handler
|
| 122 |
-
return LoggerFactory.global_handler_dict[logger_name_key]
|
| 123 |
-
|
| 124 |
-
@staticmethod
|
| 125 |
-
def get_handler(class_name, level=None, log_dir=None,
|
| 126 |
-
log_type=None, job_id=None):
|
| 127 |
-
if not log_type:
|
| 128 |
-
if not LoggerFactory.LOG_DIR or not class_name:
|
| 129 |
-
return logging.StreamHandler()
|
| 130 |
-
# return Diy_StreamHandler()
|
| 131 |
-
|
| 132 |
-
if not log_dir:
|
| 133 |
-
log_file = os.path.join(
|
| 134 |
-
LoggerFactory.LOG_DIR,
|
| 135 |
-
"{}.log".format(class_name))
|
| 136 |
-
else:
|
| 137 |
-
log_file = os.path.join(log_dir, "{}.log".format(class_name))
|
| 138 |
-
else:
|
| 139 |
-
log_file = os.path.join(log_dir, "rag_flow_{}.log".format(
|
| 140 |
-
log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type))
|
| 141 |
-
|
| 142 |
-
os.makedirs(os.path.dirname(log_file), exist_ok=True)
|
| 143 |
-
if LoggerFactory.log_share:
|
| 144 |
-
handler = ROpenHandler(log_file,
|
| 145 |
-
when='D',
|
| 146 |
-
interval=1,
|
| 147 |
-
backupCount=14,
|
| 148 |
-
delay=True)
|
| 149 |
-
else:
|
| 150 |
-
handler = TimedRotatingFileHandler(log_file,
|
| 151 |
-
when='D',
|
| 152 |
-
interval=1,
|
| 153 |
-
backupCount=14,
|
| 154 |
-
delay=True)
|
| 155 |
-
if level:
|
| 156 |
-
handler.level = level
|
| 157 |
-
|
| 158 |
-
return handler
|
| 159 |
-
|
| 160 |
-
@staticmethod
|
| 161 |
-
def init_logger(class_name):
|
| 162 |
-
with LoggerFactory.lock:
|
| 163 |
-
logger = LoggerFactory.new_logger(class_name)
|
| 164 |
-
handler = None
|
| 165 |
-
if class_name:
|
| 166 |
-
handler = LoggerFactory.get_handler(class_name)
|
| 167 |
-
logger.addHandler(handler)
|
| 168 |
-
LoggerFactory.logger_dict[class_name] = logger, handler
|
| 169 |
-
|
| 170 |
-
else:
|
| 171 |
-
LoggerFactory.logger_dict["default"] = logger, handler
|
| 172 |
-
|
| 173 |
-
LoggerFactory.assemble_global_handler(logger)
|
| 174 |
-
return logger, handler
|
| 175 |
-
|
| 176 |
-
@staticmethod
|
| 177 |
-
def assemble_global_handler(logger):
|
| 178 |
-
if LoggerFactory.LOG_DIR:
|
| 179 |
-
for level in LoggerFactory.levels:
|
| 180 |
-
if level >= LoggerFactory.LEVEL:
|
| 181 |
-
level_logger_name = logging._levelToName[level]
|
| 182 |
-
logger.addHandler(
|
| 183 |
-
LoggerFactory.get_global_handler(
|
| 184 |
-
level_logger_name, level))
|
| 185 |
-
if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR:
|
| 186 |
-
for level in LoggerFactory.levels:
|
| 187 |
-
if level >= LoggerFactory.LEVEL:
|
| 188 |
-
level_logger_name = logging._levelToName[level]
|
| 189 |
-
logger.addHandler(
|
| 190 |
-
LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR))
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
def setDirectory(directory=None):
|
| 194 |
-
LoggerFactory.set_directory(directory)
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
def setLevel(level):
|
| 198 |
-
LoggerFactory.LEVEL = level
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
def getLogger(className=None, useLevelFile=False):
|
| 202 |
-
if className is None:
|
| 203 |
-
frame = inspect.stack()[1]
|
| 204 |
-
module = inspect.getmodule(frame[0])
|
| 205 |
-
className = 'stat'
|
| 206 |
-
return LoggerFactory.get_logger(className)
|
| 207 |
-
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
class ROpenHandler(TimedRotatingFileHandler):
|
| 214 |
-
def _open(self):
|
| 215 |
-
prevumask = os.umask(000)
|
| 216 |
-
rtv = TimedRotatingFileHandler._open(self)
|
| 217 |
-
os.umask(prevumask)
|
| 218 |
-
return rtv
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
def sql_logger(job_id='', log_type='sql'):
|
| 222 |
-
key = job_id + log_type
|
| 223 |
-
if key in LoggerFactory.schedule_logger_dict.keys():
|
| 224 |
-
return LoggerFactory.schedule_logger_dict[key]
|
| 225 |
-
return get_job_logger(job_id=job_id, log_type=log_type)
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
|
| 229 |
-
prefix, suffix = base_msg(job, task, role, party_id, detail)
|
| 230 |
-
return f"{prefix}{msg} ready{suffix}"
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
|
| 234 |
-
prefix, suffix = base_msg(job, task, role, party_id, detail)
|
| 235 |
-
return f"{prefix}start to {msg}{suffix}"
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
def successful_log(msg, job=None, task=None, role=None,
|
| 239 |
-
party_id=None, detail=None):
|
| 240 |
-
prefix, suffix = base_msg(job, task, role, party_id, detail)
|
| 241 |
-
return f"{prefix}{msg} successfully{suffix}"
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
def warning_log(msg, job=None, task=None, role=None,
|
| 245 |
-
party_id=None, detail=None):
|
| 246 |
-
prefix, suffix = base_msg(job, task, role, party_id, detail)
|
| 247 |
-
return f"{prefix}{msg} is not effective{suffix}"
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
def failed_log(msg, job=None, task=None, role=None,
|
| 251 |
-
party_id=None, detail=None):
|
| 252 |
-
prefix, suffix = base_msg(job, task, role, party_id, detail)
|
| 253 |
-
return f"{prefix}failed to {msg}{suffix}"
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
def base_msg(job=None, task=None, role: str = None,
|
| 257 |
-
party_id: typing.Union[str, int] = None, detail=None):
|
| 258 |
-
if detail:
|
| 259 |
-
detail_msg = f" detail: \n{detail}"
|
| 260 |
-
else:
|
| 261 |
-
detail_msg = ""
|
| 262 |
-
if task is not None:
|
| 263 |
-
return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}"
|
| 264 |
-
elif job is not None:
|
| 265 |
-
return "", f" on {job.f_role} {job.f_party_id}{detail_msg}"
|
| 266 |
-
elif role and party_id:
|
| 267 |
-
return "", f" on {role} {party_id}{detail_msg}"
|
| 268 |
-
else:
|
| 269 |
-
return "", f"{detail_msg}"
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
def exception_to_trace_string(ex):
|
| 273 |
-
return "".join(traceback.TracebackException.from_exception(ex).format())
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
def get_logger_base_dir():
|
| 277 |
-
job_log_dir = file_utils.get_rag_flow_directory('logs')
|
| 278 |
-
return job_log_dir
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
def get_job_logger(job_id, log_type):
|
| 282 |
-
rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow')
|
| 283 |
-
job_log_dir = file_utils.get_rag_flow_directory('logs', job_id)
|
| 284 |
-
if not job_id:
|
| 285 |
-
log_dirs = [rag_flow_log_dir]
|
| 286 |
-
else:
|
| 287 |
-
if log_type == 'audit':
|
| 288 |
-
log_dirs = [job_log_dir, rag_flow_log_dir]
|
| 289 |
-
else:
|
| 290 |
-
log_dirs = [job_log_dir]
|
| 291 |
-
if LoggerFactory.log_share:
|
| 292 |
-
oldmask = os.umask(000)
|
| 293 |
-
os.makedirs(job_log_dir, exist_ok=True)
|
| 294 |
-
os.makedirs(rag_flow_log_dir, exist_ok=True)
|
| 295 |
-
os.umask(oldmask)
|
| 296 |
-
else:
|
| 297 |
-
os.makedirs(job_log_dir, exist_ok=True)
|
| 298 |
-
os.makedirs(rag_flow_log_dir, exist_ok=True)
|
| 299 |
-
logger = LoggerFactory.new_logger(f"{job_id}_{log_type}")
|
| 300 |
-
for job_log_dir in log_dirs:
|
| 301 |
-
handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL,
|
| 302 |
-
log_dir=job_log_dir, log_type=log_type, job_id=job_id)
|
| 303 |
-
error_handler = LoggerFactory.get_handler(
|
| 304 |
-
class_name=None,
|
| 305 |
-
level=logging.ERROR,
|
| 306 |
-
log_dir=job_log_dir,
|
| 307 |
-
log_type=log_type,
|
| 308 |
-
job_id=job_id)
|
| 309 |
-
logger.addHandler(handler)
|
| 310 |
-
logger.addHandler(error_handler)
|
| 311 |
-
with LoggerFactory.lock:
|
| 312 |
-
LoggerFactory.schedule_logger_dict[job_id + log_type] = logger
|
| 313 |
return logger
|
|
|
|
|
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import os
|
|
|
|
|
|
|
| 17 |
import logging
|
| 18 |
+
from logging.handlers import RotatingFileHandler
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
from api.utils.file_utils import get_project_base_directory
|
| 21 |
|
| 22 |
+
LOG_LEVEL = logging.INFO
|
| 23 |
+
LOG_FILE = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"ragflow_{os.getpid()}.log"))
|
| 24 |
+
LOG_FORMAT = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"
|
| 25 |
+
logger = None
|
| 26 |
|
| 27 |
+
def getLogger():
|
| 28 |
+
global logger
|
| 29 |
+
if logger is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
return logger
|
| 31 |
|
| 32 |
+
print(f"log file path: {LOG_FILE}")
|
| 33 |
+
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
| 34 |
+
logger = logging.getLogger("ragflow")
|
| 35 |
+
logger.setLevel(LOG_LEVEL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
handler1 = RotatingFileHandler(LOG_FILE, maxBytes=10*1024*1024, backupCount=5)
|
| 38 |
+
handler1.setLevel(LOG_LEVEL)
|
| 39 |
+
formatter1 = logging.Formatter(LOG_FORMAT)
|
| 40 |
+
handler1.setFormatter(formatter1)
|
| 41 |
+
logger.addHandler(handler1)
|
| 42 |
|
| 43 |
+
handler2 = logging.StreamHandler()
|
| 44 |
+
handler2.setLevel(LOG_LEVEL)
|
| 45 |
+
formatter2 = logging.Formatter(LOG_FORMAT)
|
| 46 |
+
handler2.setFormatter(formatter2)
|
| 47 |
+
logger.addHandler(handler2)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return logger
|
| 50 |
+
|
| 51 |
+
logger = getLogger()
|
deepdoc/parser/pdf_parser.py
CHANGED
|
@@ -19,13 +19,14 @@ from io import BytesIO
|
|
| 19 |
import re
|
| 20 |
import pdfplumber
|
| 21 |
import logging
|
| 22 |
-
from PIL import Image
|
| 23 |
import numpy as np
|
| 24 |
from timeit import default_timer as timer
|
| 25 |
from pypdf import PdfReader as pdf2_read
|
| 26 |
|
| 27 |
from api.settings import LIGHTEN
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
|
|
|
| 29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
| 30 |
from rag.nlp import rag_tokenizer
|
| 31 |
from copy import deepcopy
|
|
@@ -49,15 +50,15 @@ class RAGFlowPdfParser:
|
|
| 49 |
import torch
|
| 50 |
if torch.cuda.is_available():
|
| 51 |
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
| 52 |
-
except Exception
|
| 53 |
-
|
| 54 |
try:
|
| 55 |
model_dir = os.path.join(
|
| 56 |
get_project_base_directory(),
|
| 57 |
"rag/res/deepdoc")
|
| 58 |
self.updown_cnt_mdl.load_model(os.path.join(
|
| 59 |
model_dir, "updown_concat_xgb.model"))
|
| 60 |
-
except Exception
|
| 61 |
model_dir = snapshot_download(
|
| 62 |
repo_id="InfiniFlow/text_concat_xgb_v1.0",
|
| 63 |
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
|
@@ -187,7 +188,7 @@ class RAGFlowPdfParser:
|
|
| 187 |
return True
|
| 188 |
|
| 189 |
def _table_transformer_job(self, ZM):
|
| 190 |
-
|
| 191 |
imgs, pos = [], []
|
| 192 |
tbcnt = [0]
|
| 193 |
MARGIN = 10
|
|
@@ -425,12 +426,12 @@ class RAGFlowPdfParser:
|
|
| 425 |
detach_feats = [b["x1"] < b_["x0"],
|
| 426 |
b["x0"] > b_["x1"]]
|
| 427 |
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
|
| 428 |
-
|
| 429 |
b["text"],
|
| 430 |
b_["text"],
|
| 431 |
any(feats),
|
| 432 |
any(concatting_feats),
|
| 433 |
-
|
| 434 |
i += 1
|
| 435 |
continue
|
| 436 |
# merge up and down
|
|
@@ -726,14 +727,14 @@ class RAGFlowPdfParser:
|
|
| 726 |
# continue
|
| 727 |
if tv < fv and tk:
|
| 728 |
tables[tk].insert(0, c)
|
| 729 |
-
|
| 730 |
"TABLE:" +
|
| 731 |
self.boxes[i]["text"] +
|
| 732 |
"; Cap: " +
|
| 733 |
tk)
|
| 734 |
elif fk:
|
| 735 |
figures[fk].insert(0, c)
|
| 736 |
-
|
| 737 |
"FIGURE:" +
|
| 738 |
self.boxes[i]["text"] +
|
| 739 |
"; Cap: " +
|
|
@@ -760,7 +761,7 @@ class RAGFlowPdfParser:
|
|
| 760 |
if ii is not None:
|
| 761 |
b = louts[ii]
|
| 762 |
else:
|
| 763 |
-
|
| 764 |
f"Missing layout match: {pn + 1},%s" %
|
| 765 |
(bxs[0].get(
|
| 766 |
"layoutno", "")))
|
|
@@ -918,8 +919,8 @@ class RAGFlowPdfParser:
|
|
| 918 |
if usefull(boxes[0]):
|
| 919 |
dfs(boxes[0], 0)
|
| 920 |
else:
|
| 921 |
-
|
| 922 |
-
except Exception
|
| 923 |
pass
|
| 924 |
boxes.pop(0)
|
| 925 |
mw = np.mean(widths)
|
|
@@ -927,7 +928,7 @@ class RAGFlowPdfParser:
|
|
| 927 |
res.append(
|
| 928 |
"\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
|
| 929 |
else:
|
| 930 |
-
|
| 931 |
"<<".join([c["text"] for c in lines]))
|
| 932 |
|
| 933 |
return "\n\n".join(res)
|
|
@@ -938,8 +939,8 @@ class RAGFlowPdfParser:
|
|
| 938 |
pdf = pdfplumber.open(
|
| 939 |
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
| 940 |
return len(pdf.pages)
|
| 941 |
-
except Exception
|
| 942 |
-
|
| 943 |
|
| 944 |
def __images__(self, fnm, zoomin=3, page_from=0,
|
| 945 |
page_to=299, callback=None):
|
|
@@ -962,8 +963,8 @@ class RAGFlowPdfParser:
|
|
| 962 |
self.page_chars = [[{**c, 'top': c['top'], 'bottom': c['bottom']} for c in page.dedupe_chars().chars if self._has_color(c)] for page in
|
| 963 |
self.pdf.pages[page_from:page_to]]
|
| 964 |
self.total_page = len(self.pdf.pages)
|
| 965 |
-
except Exception
|
| 966 |
-
|
| 967 |
|
| 968 |
self.outlines = []
|
| 969 |
try:
|
|
@@ -979,11 +980,11 @@ class RAGFlowPdfParser:
|
|
| 979 |
|
| 980 |
dfs(outlines, 0)
|
| 981 |
except Exception as e:
|
| 982 |
-
|
| 983 |
if not self.outlines:
|
| 984 |
-
|
| 985 |
|
| 986 |
-
|
| 987 |
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
| 988 |
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
| 989 |
range(len(self.page_chars))]
|
|
@@ -1023,7 +1024,7 @@ class RAGFlowPdfParser:
|
|
| 1023 |
self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
|
| 1024 |
"".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
| 1025 |
|
| 1026 |
-
|
| 1027 |
|
| 1028 |
self.page_cum_height = np.cumsum(self.page_cum_height)
|
| 1029 |
assert len(self.page_cum_height) == len(self.page_images) + 1
|
|
@@ -1162,10 +1163,10 @@ class PlainParser(object):
|
|
| 1162 |
dfs(a, depth + 1)
|
| 1163 |
|
| 1164 |
dfs(outlines, 0)
|
| 1165 |
-
except Exception
|
| 1166 |
-
|
| 1167 |
if not self.outlines:
|
| 1168 |
-
|
| 1169 |
|
| 1170 |
return [(l, "") for l in lines], []
|
| 1171 |
|
|
|
|
| 19 |
import re
|
| 20 |
import pdfplumber
|
| 21 |
import logging
|
| 22 |
+
from PIL import Image
|
| 23 |
import numpy as np
|
| 24 |
from timeit import default_timer as timer
|
| 25 |
from pypdf import PdfReader as pdf2_read
|
| 26 |
|
| 27 |
from api.settings import LIGHTEN
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
| 29 |
+
from api.utils.log_utils import logger
|
| 30 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
| 31 |
from rag.nlp import rag_tokenizer
|
| 32 |
from copy import deepcopy
|
|
|
|
| 50 |
import torch
|
| 51 |
if torch.cuda.is_available():
|
| 52 |
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
| 53 |
+
except Exception:
|
| 54 |
+
logger.exception("RAGFlowPdfParser __init__")
|
| 55 |
try:
|
| 56 |
model_dir = os.path.join(
|
| 57 |
get_project_base_directory(),
|
| 58 |
"rag/res/deepdoc")
|
| 59 |
self.updown_cnt_mdl.load_model(os.path.join(
|
| 60 |
model_dir, "updown_concat_xgb.model"))
|
| 61 |
+
except Exception:
|
| 62 |
model_dir = snapshot_download(
|
| 63 |
repo_id="InfiniFlow/text_concat_xgb_v1.0",
|
| 64 |
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
|
|
|
| 188 |
return True
|
| 189 |
|
| 190 |
def _table_transformer_job(self, ZM):
|
| 191 |
+
logger.info("Table processing...")
|
| 192 |
imgs, pos = [], []
|
| 193 |
tbcnt = [0]
|
| 194 |
MARGIN = 10
|
|
|
|
| 426 |
detach_feats = [b["x1"] < b_["x0"],
|
| 427 |
b["x0"] > b_["x1"]]
|
| 428 |
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
|
| 429 |
+
logger.info("{} {} {} {}".format(
|
| 430 |
b["text"],
|
| 431 |
b_["text"],
|
| 432 |
any(feats),
|
| 433 |
any(concatting_feats),
|
| 434 |
+
))
|
| 435 |
i += 1
|
| 436 |
continue
|
| 437 |
# merge up and down
|
|
|
|
| 727 |
# continue
|
| 728 |
if tv < fv and tk:
|
| 729 |
tables[tk].insert(0, c)
|
| 730 |
+
logger.debug(
|
| 731 |
"TABLE:" +
|
| 732 |
self.boxes[i]["text"] +
|
| 733 |
"; Cap: " +
|
| 734 |
tk)
|
| 735 |
elif fk:
|
| 736 |
figures[fk].insert(0, c)
|
| 737 |
+
logger.debug(
|
| 738 |
"FIGURE:" +
|
| 739 |
self.boxes[i]["text"] +
|
| 740 |
"; Cap: " +
|
|
|
|
| 761 |
if ii is not None:
|
| 762 |
b = louts[ii]
|
| 763 |
else:
|
| 764 |
+
logger.warn(
|
| 765 |
f"Missing layout match: {pn + 1},%s" %
|
| 766 |
(bxs[0].get(
|
| 767 |
"layoutno", "")))
|
|
|
|
| 919 |
if usefull(boxes[0]):
|
| 920 |
dfs(boxes[0], 0)
|
| 921 |
else:
|
| 922 |
+
logger.debug("WASTE: " + boxes[0]["text"])
|
| 923 |
+
except Exception:
|
| 924 |
pass
|
| 925 |
boxes.pop(0)
|
| 926 |
mw = np.mean(widths)
|
|
|
|
| 928 |
res.append(
|
| 929 |
"\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
|
| 930 |
else:
|
| 931 |
+
logger.debug("REMOVED: " +
|
| 932 |
"<<".join([c["text"] for c in lines]))
|
| 933 |
|
| 934 |
return "\n\n".join(res)
|
|
|
|
| 939 |
pdf = pdfplumber.open(
|
| 940 |
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
| 941 |
return len(pdf.pages)
|
| 942 |
+
except Exception:
|
| 943 |
+
logger.exception("total_page_number")
|
| 944 |
|
| 945 |
def __images__(self, fnm, zoomin=3, page_from=0,
|
| 946 |
page_to=299, callback=None):
|
|
|
|
| 963 |
self.page_chars = [[{**c, 'top': c['top'], 'bottom': c['bottom']} for c in page.dedupe_chars().chars if self._has_color(c)] for page in
|
| 964 |
self.pdf.pages[page_from:page_to]]
|
| 965 |
self.total_page = len(self.pdf.pages)
|
| 966 |
+
except Exception:
|
| 967 |
+
logger.exception("RAGFlowPdfParser __images__")
|
| 968 |
|
| 969 |
self.outlines = []
|
| 970 |
try:
|
|
|
|
| 980 |
|
| 981 |
dfs(outlines, 0)
|
| 982 |
except Exception as e:
|
| 983 |
+
logger.warning(f"Outlines exception: {e}")
|
| 984 |
if not self.outlines:
|
| 985 |
+
logger.warning("Miss outlines")
|
| 986 |
|
| 987 |
+
logger.info("Images converted.")
|
| 988 |
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
| 989 |
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
| 990 |
range(len(self.page_chars))]
|
|
|
|
| 1024 |
self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
|
| 1025 |
"".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
| 1026 |
|
| 1027 |
+
logger.info("Is it English:", self.is_english)
|
| 1028 |
|
| 1029 |
self.page_cum_height = np.cumsum(self.page_cum_height)
|
| 1030 |
assert len(self.page_cum_height) == len(self.page_images) + 1
|
|
|
|
| 1163 |
dfs(a, depth + 1)
|
| 1164 |
|
| 1165 |
dfs(outlines, 0)
|
| 1166 |
+
except Exception:
|
| 1167 |
+
logger.exception("Outlines exception")
|
| 1168 |
if not self.outlines:
|
| 1169 |
+
logger.warning("Miss outlines")
|
| 1170 |
|
| 1171 |
return [(l, "") for l in lines], []
|
| 1172 |
|
deepdoc/parser/resume/entities/corporations.py
CHANGED
|
@@ -11,10 +11,15 @@
|
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
|
| 14 |
-
import re
|
|
|
|
|
|
|
| 15 |
import pandas as pd
|
| 16 |
from rag.nlp import rag_tokenizer
|
| 17 |
from . import regions
|
|
|
|
|
|
|
|
|
|
| 18 |
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
| 19 |
GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
|
| 20 |
GOODS["cid"] = GOODS["cid"].astype(str)
|
|
@@ -27,7 +32,7 @@ def baike(cid, default_v=0):
|
|
| 27 |
global GOODS
|
| 28 |
try:
|
| 29 |
return GOODS.loc[str(cid), "len"]
|
| 30 |
-
except Exception
|
| 31 |
pass
|
| 32 |
return default_v
|
| 33 |
|
|
@@ -65,7 +70,8 @@ def rmNoise(n):
|
|
| 65 |
GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
|
| 66 |
for c,v in CORP_TAG.items():
|
| 67 |
cc = corpNorm(rmNoise(c), False)
|
| 68 |
-
if not cc:
|
|
|
|
| 69 |
CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
|
| 70 |
|
| 71 |
def is_good(nm):
|
|
|
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
|
| 14 |
+
import re
|
| 15 |
+
import json
|
| 16 |
+
import os
|
| 17 |
import pandas as pd
|
| 18 |
from rag.nlp import rag_tokenizer
|
| 19 |
from . import regions
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
+
|
| 22 |
+
|
| 23 |
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
| 24 |
GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
|
| 25 |
GOODS["cid"] = GOODS["cid"].astype(str)
|
|
|
|
| 32 |
global GOODS
|
| 33 |
try:
|
| 34 |
return GOODS.loc[str(cid), "len"]
|
| 35 |
+
except Exception:
|
| 36 |
pass
|
| 37 |
return default_v
|
| 38 |
|
|
|
|
| 70 |
GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
|
| 71 |
for c,v in CORP_TAG.items():
|
| 72 |
cc = corpNorm(rmNoise(c), False)
|
| 73 |
+
if not cc:
|
| 74 |
+
logger.info(c)
|
| 75 |
CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
|
| 76 |
|
| 77 |
def is_good(nm):
|
deepdoc/parser/resume/step_two.py
CHANGED
|
@@ -11,13 +11,19 @@
|
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
|
| 14 |
-
import re
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
import numpy as np
|
| 17 |
from deepdoc.parser.resume.entities import degrees, schools, corporations
|
| 18 |
from rag.nlp import rag_tokenizer, surname
|
| 19 |
from xpinyin import Pinyin
|
| 20 |
from contextlib import contextmanager
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class TimeoutException(Exception): pass
|
|
@@ -79,7 +85,7 @@ def forEdu(cv):
|
|
| 79 |
y, m, d = getYMD(dt)
|
| 80 |
st_dt.append(str(y))
|
| 81 |
e["start_dt_kwd"] = str(y)
|
| 82 |
-
except Exception
|
| 83 |
pass
|
| 84 |
|
| 85 |
r = schools.select(n.get("school_name", ""))
|
|
@@ -158,7 +164,7 @@ def forEdu(cv):
|
|
| 158 |
y, m, d = getYMD(edu_end_dt)
|
| 159 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
| 160 |
except Exception as e:
|
| 161 |
-
|
| 162 |
if sch:
|
| 163 |
cv["school_name_kwd"] = sch
|
| 164 |
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
|
|
@@ -233,7 +239,7 @@ def forWork(cv):
|
|
| 233 |
if type(n) == type(""):
|
| 234 |
try:
|
| 235 |
n = json_loads(n)
|
| 236 |
-
except Exception
|
| 237 |
continue
|
| 238 |
|
| 239 |
if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
|
|
@@ -269,8 +275,8 @@ def forWork(cv):
|
|
| 269 |
|
| 270 |
try:
|
| 271 |
duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
|
| 272 |
-
except Exception
|
| 273 |
-
|
| 274 |
|
| 275 |
if n.get("scale"):
|
| 276 |
r = re.search(r"^([0-9]+)", str(n["scale"]))
|
|
@@ -327,7 +333,7 @@ def forWork(cv):
|
|
| 327 |
y, m, d = getYMD(work_st_tm)
|
| 328 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
| 329 |
except Exception as e:
|
| 330 |
-
|
| 331 |
|
| 332 |
cv["job_num_int"] = 0
|
| 333 |
if duas:
|
|
@@ -457,8 +463,8 @@ def parse(cv):
|
|
| 457 |
t = k[:-4]
|
| 458 |
cv[f"{t}_kwd"] = nms
|
| 459 |
cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
|
| 460 |
-
except Exception
|
| 461 |
-
|
| 462 |
cv[k] = []
|
| 463 |
|
| 464 |
# tokenize fields
|
|
@@ -524,7 +530,7 @@ def parse(cv):
|
|
| 524 |
if not y: y = "2012"
|
| 525 |
if not m: m = "01"
|
| 526 |
if not d: d = "01"
|
| 527 |
-
cv["updated_at_dt"] =
|
| 528 |
# long text tokenize
|
| 529 |
|
| 530 |
if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
|
|
@@ -556,10 +562,10 @@ def parse(cv):
|
|
| 556 |
cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365.
|
| 557 |
elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])):
|
| 558 |
y, m, d = getYMD(str(cv["work_start_time"]))
|
| 559 |
-
cv["work_start_dt"] =
|
| 560 |
cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
|
| 561 |
except Exception as e:
|
| 562 |
-
|
| 563 |
if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
|
| 564 |
|
| 565 |
keys = list(cv.keys())
|
|
@@ -574,7 +580,7 @@ def parse(cv):
|
|
| 574 |
|
| 575 |
cv["tob_resume_id"] = str(cv["tob_resume_id"])
|
| 576 |
cv["id"] = cv["tob_resume_id"]
|
| 577 |
-
|
| 578 |
|
| 579 |
return dealWithInt64(cv)
|
| 580 |
|
|
@@ -589,4 +595,3 @@ def dealWithInt64(d):
|
|
| 589 |
|
| 590 |
if isinstance(d, np.integer): d = int(d)
|
| 591 |
return d
|
| 592 |
-
|
|
|
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
|
| 14 |
+
import re
|
| 15 |
+
import copy
|
| 16 |
+
import time
|
| 17 |
+
import datetime
|
| 18 |
+
import demjson3
|
| 19 |
+
import traceback
|
| 20 |
+
import signal
|
| 21 |
import numpy as np
|
| 22 |
from deepdoc.parser.resume.entities import degrees, schools, corporations
|
| 23 |
from rag.nlp import rag_tokenizer, surname
|
| 24 |
from xpinyin import Pinyin
|
| 25 |
from contextlib import contextmanager
|
| 26 |
+
from api.utils.log_utils import logger
|
| 27 |
|
| 28 |
|
| 29 |
class TimeoutException(Exception): pass
|
|
|
|
| 85 |
y, m, d = getYMD(dt)
|
| 86 |
st_dt.append(str(y))
|
| 87 |
e["start_dt_kwd"] = str(y)
|
| 88 |
+
except Exception:
|
| 89 |
pass
|
| 90 |
|
| 91 |
r = schools.select(n.get("school_name", ""))
|
|
|
|
| 164 |
y, m, d = getYMD(edu_end_dt)
|
| 165 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
| 166 |
except Exception as e:
|
| 167 |
+
logger.exception("forEdu {} {} {}".format(e, edu_end_dt, cv.get("work_exp_flt")))
|
| 168 |
if sch:
|
| 169 |
cv["school_name_kwd"] = sch
|
| 170 |
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
|
|
|
|
| 239 |
if type(n) == type(""):
|
| 240 |
try:
|
| 241 |
n = json_loads(n)
|
| 242 |
+
except Exception:
|
| 243 |
continue
|
| 244 |
|
| 245 |
if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
|
|
|
|
| 275 |
|
| 276 |
try:
|
| 277 |
duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
|
| 278 |
+
except Exception:
|
| 279 |
+
logger.exception("forWork {} {}".format(n.get("start_time"), n.get("end_time")))
|
| 280 |
|
| 281 |
if n.get("scale"):
|
| 282 |
r = re.search(r"^([0-9]+)", str(n["scale"]))
|
|
|
|
| 333 |
y, m, d = getYMD(work_st_tm)
|
| 334 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
| 335 |
except Exception as e:
|
| 336 |
+
logger.exception("forWork {} {} {}".format(e, work_st_tm, cv.get("work_exp_flt")))
|
| 337 |
|
| 338 |
cv["job_num_int"] = 0
|
| 339 |
if duas:
|
|
|
|
| 463 |
t = k[:-4]
|
| 464 |
cv[f"{t}_kwd"] = nms
|
| 465 |
cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
|
| 466 |
+
except Exception:
|
| 467 |
+
logger.exception("parse {} {}".format(str(traceback.format_exc()), cv[k]))
|
| 468 |
cv[k] = []
|
| 469 |
|
| 470 |
# tokenize fields
|
|
|
|
| 530 |
if not y: y = "2012"
|
| 531 |
if not m: m = "01"
|
| 532 |
if not d: d = "01"
|
| 533 |
+
cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
|
| 534 |
# long text tokenize
|
| 535 |
|
| 536 |
if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
|
|
|
|
| 562 |
cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365.
|
| 563 |
elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])):
|
| 564 |
y, m, d = getYMD(str(cv["work_start_time"]))
|
| 565 |
+
cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
|
| 566 |
cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
|
| 567 |
except Exception as e:
|
| 568 |
+
logger.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
|
| 569 |
if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
|
| 570 |
|
| 571 |
keys = list(cv.keys())
|
|
|
|
| 580 |
|
| 581 |
cv["tob_resume_id"] = str(cv["tob_resume_id"])
|
| 582 |
cv["id"] = cv["tob_resume_id"]
|
| 583 |
+
logger.info("CCCCCCCCCCCCCCC")
|
| 584 |
|
| 585 |
return dealWithInt64(cv)
|
| 586 |
|
|
|
|
| 595 |
|
| 596 |
if isinstance(d, np.integer): d = int(d)
|
| 597 |
return d
|
|
|
deepdoc/vision/operators.py
CHANGED
|
@@ -20,6 +20,7 @@ import cv2
|
|
| 20 |
import numpy as np
|
| 21 |
import math
|
| 22 |
from PIL import Image
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class DecodeImage(object):
|
|
@@ -402,7 +403,7 @@ class DetResizeForTest(object):
|
|
| 402 |
return None, (None, None)
|
| 403 |
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
| 404 |
except BaseException:
|
| 405 |
-
|
| 406 |
sys.exit(0)
|
| 407 |
ratio_h = resize_h / float(h)
|
| 408 |
ratio_w = resize_w / float(w)
|
|
@@ -452,7 +453,6 @@ class E2EResizeForTest(object):
|
|
| 452 |
return data
|
| 453 |
|
| 454 |
def resize_image_for_totaltext(self, im, max_side_len=512):
|
| 455 |
-
|
| 456 |
h, w, _ = im.shape
|
| 457 |
resize_w = w
|
| 458 |
resize_h = h
|
|
|
|
| 20 |
import numpy as np
|
| 21 |
import math
|
| 22 |
from PIL import Image
|
| 23 |
+
from api.utils.log_utils import logger
|
| 24 |
|
| 25 |
|
| 26 |
class DecodeImage(object):
|
|
|
|
| 403 |
return None, (None, None)
|
| 404 |
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
| 405 |
except BaseException:
|
| 406 |
+
logger.exception("{} {} {}".format(img.shape, resize_w, resize_h))
|
| 407 |
sys.exit(0)
|
| 408 |
ratio_h = resize_h / float(h)
|
| 409 |
ratio_w = resize_w / float(w)
|
|
|
|
| 453 |
return data
|
| 454 |
|
| 455 |
def resize_image_for_totaltext(self, im, max_side_len=512):
|
|
|
|
| 456 |
h, w, _ = im.shape
|
| 457 |
resize_w = w
|
| 458 |
resize_h = h
|
deepdoc/vision/recognizer.py
CHANGED
|
@@ -19,6 +19,7 @@ from huggingface_hub import snapshot_download
|
|
| 19 |
|
| 20 |
from api.utils.file_utils import get_project_base_directory
|
| 21 |
from .operators import *
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
class Recognizer(object):
|
|
@@ -439,7 +440,7 @@ class Recognizer(object):
|
|
| 439 |
end_index = min((i + 1) * batch_size, len(imgs))
|
| 440 |
batch_image_list = imgs[start_index:end_index]
|
| 441 |
inputs = self.preprocess(batch_image_list)
|
| 442 |
-
|
| 443 |
for ins in inputs:
|
| 444 |
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
| 445 |
res.append(bb)
|
|
|
|
| 19 |
|
| 20 |
from api.utils.file_utils import get_project_base_directory
|
| 21 |
from .operators import *
|
| 22 |
+
from api.utils.log_utils import logger
|
| 23 |
|
| 24 |
|
| 25 |
class Recognizer(object):
|
|
|
|
| 440 |
end_index = min((i + 1) * batch_size, len(imgs))
|
| 441 |
batch_image_list = imgs[start_index:end_index]
|
| 442 |
inputs = self.preprocess(batch_image_list)
|
| 443 |
+
logger.info("preprocess")
|
| 444 |
for ins in inputs:
|
| 445 |
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
| 446 |
res.append(bb)
|
deepdoc/vision/seeit.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
import os
|
| 15 |
import PIL
|
| 16 |
from PIL import ImageDraw
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
|
|
@@ -24,7 +25,7 @@ def save_results(image_list, results, labels, output_dir='output/', threshold=0.
|
|
| 24 |
|
| 25 |
out_path = os.path.join(output_dir, f"{idx}.jpg")
|
| 26 |
im.save(out_path, quality=95)
|
| 27 |
-
|
| 28 |
|
| 29 |
|
| 30 |
def draw_box(im, result, lables, threshold=0.5):
|
|
|
|
| 14 |
import os
|
| 15 |
import PIL
|
| 16 |
from PIL import ImageDraw
|
| 17 |
+
from api.utils.log_utils import logger
|
| 18 |
|
| 19 |
|
| 20 |
def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
|
|
|
|
| 25 |
|
| 26 |
out_path = os.path.join(output_dir, f"{idx}.jpg")
|
| 27 |
im.save(out_path, quality=95)
|
| 28 |
+
logger.info("save result to: " + out_path)
|
| 29 |
|
| 30 |
|
| 31 |
def draw_box(im, result, lables, threshold=0.5):
|
deepdoc/vision/t_recognizer.py
CHANGED
|
@@ -10,7 +10,10 @@
|
|
| 10 |
# See the License for the specific language governing permissions and
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
-
import os
|
|
|
|
|
|
|
|
|
|
| 14 |
sys.path.insert(
|
| 15 |
0,
|
| 16 |
os.path.abspath(
|
|
@@ -56,7 +59,7 @@ def main(args):
|
|
| 56 |
} for t in lyt]
|
| 57 |
img = draw_box(images[i], lyt, labels, float(args.threshold))
|
| 58 |
img.save(outputs[i], quality=95)
|
| 59 |
-
|
| 60 |
|
| 61 |
|
| 62 |
def get_table_html(img, tb_cpns, ocr):
|
|
|
|
| 10 |
# See the License for the specific language governing permissions and
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
+
import os
|
| 14 |
+
import sys
|
| 15 |
+
from api.utils.log_utils import logger
|
| 16 |
+
|
| 17 |
sys.path.insert(
|
| 18 |
0,
|
| 19 |
os.path.abspath(
|
|
|
|
| 59 |
} for t in lyt]
|
| 60 |
img = draw_box(images[i], lyt, labels, float(args.threshold))
|
| 61 |
img.save(outputs[i], quality=95)
|
| 62 |
+
logger.info("save result to: " + outputs[i])
|
| 63 |
|
| 64 |
|
| 65 |
def get_table_html(img, tb_cpns, ocr):
|
graphrag/claim_extractor.py
CHANGED
|
@@ -7,7 +7,6 @@ Reference:
|
|
| 7 |
|
| 8 |
import argparse
|
| 9 |
import json
|
| 10 |
-
import logging
|
| 11 |
import re
|
| 12 |
import traceback
|
| 13 |
from dataclasses import dataclass
|
|
@@ -18,12 +17,12 @@ import tiktoken
|
|
| 18 |
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
| 19 |
from rag.llm.chat_model import Base as CompletionLLM
|
| 20 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
|
|
|
| 21 |
|
| 22 |
DEFAULT_TUPLE_DELIMITER = "<|>"
|
| 23 |
DEFAULT_RECORD_DELIMITER = "##"
|
| 24 |
DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
|
| 25 |
CLAIM_MAX_GLEANINGS = 1
|
| 26 |
-
log = logging.getLogger(__name__)
|
| 27 |
|
| 28 |
|
| 29 |
@dataclass
|
|
@@ -127,7 +126,7 @@ class ClaimExtractor:
|
|
| 127 |
]
|
| 128 |
source_doc_map[document_id] = text
|
| 129 |
except Exception as e:
|
| 130 |
-
|
| 131 |
self._on_error(
|
| 132 |
e,
|
| 133 |
traceback.format_exc(),
|
|
@@ -266,4 +265,4 @@ if __name__ == "__main__":
|
|
| 266 |
"claim_description": ""
|
| 267 |
}
|
| 268 |
claim = ex(info)
|
| 269 |
-
|
|
|
|
| 7 |
|
| 8 |
import argparse
|
| 9 |
import json
|
|
|
|
| 10 |
import re
|
| 11 |
import traceback
|
| 12 |
from dataclasses import dataclass
|
|
|
|
| 17 |
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
| 18 |
from rag.llm.chat_model import Base as CompletionLLM
|
| 19 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
| 20 |
+
from api.utils.log_utils import logger
|
| 21 |
|
| 22 |
DEFAULT_TUPLE_DELIMITER = "<|>"
|
| 23 |
DEFAULT_RECORD_DELIMITER = "##"
|
| 24 |
DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
|
| 25 |
CLAIM_MAX_GLEANINGS = 1
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
@dataclass
|
|
|
|
| 126 |
]
|
| 127 |
source_doc_map[document_id] = text
|
| 128 |
except Exception as e:
|
| 129 |
+
logger.exception("error extracting claim")
|
| 130 |
self._on_error(
|
| 131 |
e,
|
| 132 |
traceback.format_exc(),
|
|
|
|
| 265 |
"claim_description": ""
|
| 266 |
}
|
| 267 |
claim = ex(info)
|
| 268 |
+
logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
|
graphrag/community_reports_extractor.py
CHANGED
|
@@ -6,11 +6,10 @@ Reference:
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import json
|
| 9 |
-
import logging
|
| 10 |
import re
|
| 11 |
import traceback
|
| 12 |
from dataclasses import dataclass
|
| 13 |
-
from typing import
|
| 14 |
import networkx as nx
|
| 15 |
import pandas as pd
|
| 16 |
from graphrag import leiden
|
|
@@ -20,8 +19,7 @@ from rag.llm.chat_model import Base as CompletionLLM
|
|
| 20 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
|
| 21 |
from rag.utils import num_tokens_from_string
|
| 22 |
from timeit import default_timer as timer
|
| 23 |
-
|
| 24 |
-
log = logging.getLogger(__name__)
|
| 25 |
|
| 26 |
|
| 27 |
@dataclass
|
|
@@ -82,7 +80,7 @@ class CommunityReportsExtractor:
|
|
| 82 |
response = re.sub(r"[^\}]*$", "", response)
|
| 83 |
response = re.sub(r"\{\{", "{", response)
|
| 84 |
response = re.sub(r"\}\}", "}", response)
|
| 85 |
-
|
| 86 |
response = json.loads(response)
|
| 87 |
if not dict_has_keys_with_types(response, [
|
| 88 |
("title", str),
|
|
@@ -94,7 +92,7 @@ class CommunityReportsExtractor:
|
|
| 94 |
response["weight"] = weight
|
| 95 |
response["entities"] = ents
|
| 96 |
except Exception as e:
|
| 97 |
-
|
| 98 |
self._on_error(e, traceback.format_exc(), None)
|
| 99 |
continue
|
| 100 |
|
|
@@ -127,5 +125,4 @@ class CommunityReportsExtractor:
|
|
| 127 |
report_sections = "\n\n".join(
|
| 128 |
f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
|
| 129 |
)
|
| 130 |
-
|
| 131 |
return f"# {title}\n\n{summary}\n\n{report_sections}"
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import json
|
|
|
|
| 9 |
import re
|
| 10 |
import traceback
|
| 11 |
from dataclasses import dataclass
|
| 12 |
+
from typing import List, Callable
|
| 13 |
import networkx as nx
|
| 14 |
import pandas as pd
|
| 15 |
from graphrag import leiden
|
|
|
|
| 19 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
|
| 20 |
from rag.utils import num_tokens_from_string
|
| 21 |
from timeit import default_timer as timer
|
| 22 |
+
from api.utils.log_utils import logger
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
@dataclass
|
|
|
|
| 80 |
response = re.sub(r"[^\}]*$", "", response)
|
| 81 |
response = re.sub(r"\{\{", "{", response)
|
| 82 |
response = re.sub(r"\}\}", "}", response)
|
| 83 |
+
logger.info(response)
|
| 84 |
response = json.loads(response)
|
| 85 |
if not dict_has_keys_with_types(response, [
|
| 86 |
("title", str),
|
|
|
|
| 92 |
response["weight"] = weight
|
| 93 |
response["entities"] = ents
|
| 94 |
except Exception as e:
|
| 95 |
+
logger.exception("CommunityReportsExtractor got exception")
|
| 96 |
self._on_error(e, traceback.format_exc(), None)
|
| 97 |
continue
|
| 98 |
|
|
|
|
| 125 |
report_sections = "\n\n".join(
|
| 126 |
f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
|
| 127 |
)
|
|
|
|
| 128 |
return f"# {title}\n\n{summary}\n\n{report_sections}"
|
graphrag/index.py
CHANGED
|
@@ -28,6 +28,7 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
|
|
| 28 |
from graphrag.mind_map_extractor import MindMapExtractor
|
| 29 |
from rag.nlp import rag_tokenizer
|
| 30 |
from rag.utils import num_tokens_from_string
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
def graph_merge(g1, g2):
|
|
@@ -94,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
|
|
| 94 |
chunks = []
|
| 95 |
for n, attr in graph.nodes(data=True):
|
| 96 |
if attr.get("rank", 0) == 0:
|
| 97 |
-
|
| 98 |
continue
|
| 99 |
chunk = {
|
| 100 |
"name_kwd": n,
|
|
@@ -136,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
|
|
| 136 |
mg = mindmap(_chunks).output
|
| 137 |
if not len(mg.keys()): return chunks
|
| 138 |
|
| 139 |
-
|
| 140 |
chunks.append(
|
| 141 |
{
|
| 142 |
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
|
|
|
|
| 28 |
from graphrag.mind_map_extractor import MindMapExtractor
|
| 29 |
from rag.nlp import rag_tokenizer
|
| 30 |
from rag.utils import num_tokens_from_string
|
| 31 |
+
from api.utils.log_utils import logger
|
| 32 |
|
| 33 |
|
| 34 |
def graph_merge(g1, g2):
|
|
|
|
| 95 |
chunks = []
|
| 96 |
for n, attr in graph.nodes(data=True):
|
| 97 |
if attr.get("rank", 0) == 0:
|
| 98 |
+
logger.info(f"Ignore entity: {n}")
|
| 99 |
continue
|
| 100 |
chunk = {
|
| 101 |
"name_kwd": n,
|
|
|
|
| 137 |
mg = mindmap(_chunks).output
|
| 138 |
if not len(mg.keys()): return chunks
|
| 139 |
|
| 140 |
+
logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
|
| 141 |
chunks.append(
|
| 142 |
{
|
| 143 |
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
|
graphrag/mind_map_extractor.py
CHANGED
|
@@ -18,7 +18,6 @@ import collections
|
|
| 18 |
import logging
|
| 19 |
import os
|
| 20 |
import re
|
| 21 |
-
import logging
|
| 22 |
import traceback
|
| 23 |
from concurrent.futures import ThreadPoolExecutor
|
| 24 |
from dataclasses import dataclass
|
|
@@ -30,6 +29,7 @@ from rag.llm.chat_model import Base as CompletionLLM
|
|
| 30 |
import markdown_to_json
|
| 31 |
from functools import reduce
|
| 32 |
from rag.utils import num_tokens_from_string
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
@dataclass
|
|
@@ -193,6 +193,6 @@ class MindMapExtractor:
|
|
| 193 |
gen_conf = {"temperature": 0.5}
|
| 194 |
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
|
| 195 |
response = re.sub(r"```[^\n]*", "", response)
|
| 196 |
-
|
| 197 |
-
|
| 198 |
return self._todict(markdown_to_json.dictify(response))
|
|
|
|
| 18 |
import logging
|
| 19 |
import os
|
| 20 |
import re
|
|
|
|
| 21 |
import traceback
|
| 22 |
from concurrent.futures import ThreadPoolExecutor
|
| 23 |
from dataclasses import dataclass
|
|
|
|
| 29 |
import markdown_to_json
|
| 30 |
from functools import reduce
|
| 31 |
from rag.utils import num_tokens_from_string
|
| 32 |
+
from api.utils.log_utils import logger
|
| 33 |
|
| 34 |
|
| 35 |
@dataclass
|
|
|
|
| 193 |
gen_conf = {"temperature": 0.5}
|
| 194 |
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
|
| 195 |
response = re.sub(r"```[^\n]*", "", response)
|
| 196 |
+
logger.info(response)
|
| 197 |
+
logger.info(self._todict(markdown_to_json.dictify(response)))
|
| 198 |
return self._todict(markdown_to_json.dictify(response))
|
intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py
CHANGED
|
@@ -2,7 +2,7 @@ import requests
|
|
| 2 |
from bridge.context import ContextType # Import Context, ContextType
|
| 3 |
from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
|
| 4 |
from bridge import *
|
| 5 |
-
from
|
| 6 |
from plugins import Plugin, register # Import Plugin and register
|
| 7 |
from plugins.event import Event, EventContext, EventAction # Import event-related classes
|
| 8 |
|
|
@@ -76,7 +76,7 @@ class RAGFlowChat(Plugin):
|
|
| 76 |
logger.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
|
| 77 |
return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
|
| 78 |
except Exception as e:
|
| 79 |
-
logger.exception(
|
| 80 |
return f"Sorry, an internal error occurred: {str(e)}"
|
| 81 |
|
| 82 |
# Step 2: Send the message and get a reply
|
|
@@ -108,5 +108,5 @@ class RAGFlowChat(Plugin):
|
|
| 108 |
logger.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
|
| 109 |
return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
|
| 110 |
except Exception as e:
|
| 111 |
-
logger.exception(
|
| 112 |
return f"Sorry, an internal error occurred: {str(e)}"
|
|
|
|
| 2 |
from bridge.context import ContextType # Import Context, ContextType
|
| 3 |
from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
|
| 4 |
from bridge import *
|
| 5 |
+
from api.utils.log_utils import logger
|
| 6 |
from plugins import Plugin, register # Import Plugin and register
|
| 7 |
from plugins.event import Event, EventContext, EventAction # Import event-related classes
|
| 8 |
|
|
|
|
| 76 |
logger.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
|
| 77 |
return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
|
| 78 |
except Exception as e:
|
| 79 |
+
logger.exception("[RAGFlowChat] Exception when creating conversation")
|
| 80 |
return f"Sorry, an internal error occurred: {str(e)}"
|
| 81 |
|
| 82 |
# Step 2: Send the message and get a reply
|
|
|
|
| 108 |
logger.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
|
| 109 |
return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
|
| 110 |
except Exception as e:
|
| 111 |
+
logger.exception("[RAGFlowChat] Exception when getting answer")
|
| 112 |
return f"Sorry, an internal error occurred: {str(e)}"
|
rag/app/book.py
CHANGED
|
@@ -20,6 +20,7 @@ from rag.nlp import bullets_category, is_english,remove_contents_table, \
|
|
| 20 |
tokenize_chunks
|
| 21 |
from rag.nlp import rag_tokenizer
|
| 22 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
class Pdf(PdfParser):
|
|
@@ -38,7 +39,7 @@ class Pdf(PdfParser):
|
|
| 38 |
start = timer()
|
| 39 |
self._layouts_rec(zoomin)
|
| 40 |
callback(0.67, "Layout analysis finished")
|
| 41 |
-
|
| 42 |
self._table_transformer_job(zoomin)
|
| 43 |
callback(0.68, "Table analysis finished")
|
| 44 |
self._text_merge()
|
|
|
|
| 20 |
tokenize_chunks
|
| 21 |
from rag.nlp import rag_tokenizer
|
| 22 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
| 23 |
+
from api.utils.log_utils import logger
|
| 24 |
|
| 25 |
|
| 26 |
class Pdf(PdfParser):
|
|
|
|
| 39 |
start = timer()
|
| 40 |
self._layouts_rec(zoomin)
|
| 41 |
callback(0.67, "Layout analysis finished")
|
| 42 |
+
logger.info("layouts: {}".format(timer() - start))
|
| 43 |
self._table_transformer_job(zoomin)
|
| 44 |
callback(0.68, "Table analysis finished")
|
| 45 |
self._text_merge()
|
rag/app/email.py
CHANGED
|
@@ -18,7 +18,7 @@ import re
|
|
| 18 |
from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
|
| 19 |
from deepdoc.parser import HtmlParser, TxtParser
|
| 20 |
from timeit import default_timer as timer
|
| 21 |
-
from
|
| 22 |
import io
|
| 23 |
|
| 24 |
|
|
@@ -86,7 +86,7 @@ def chunk(
|
|
| 86 |
)
|
| 87 |
|
| 88 |
main_res.extend(tokenize_chunks(chunks, doc, eng, None))
|
| 89 |
-
|
| 90 |
# get the attachment info
|
| 91 |
for part in msg.iter_attachments():
|
| 92 |
content_disposition = part.get("Content-Disposition")
|
|
|
|
| 18 |
from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
|
| 19 |
from deepdoc.parser import HtmlParser, TxtParser
|
| 20 |
from timeit import default_timer as timer
|
| 21 |
+
from api.utils.log_utils import logger
|
| 22 |
import io
|
| 23 |
|
| 24 |
|
|
|
|
| 86 |
)
|
| 87 |
|
| 88 |
main_res.extend(tokenize_chunks(chunks, doc, eng, None))
|
| 89 |
+
logger.info("naive_merge({}): {}".format(filename, timer() - st))
|
| 90 |
# get the attachment info
|
| 91 |
for part in msg.iter_attachments():
|
| 92 |
content_disposition = part.get("Content-Disposition")
|
rag/app/laws.py
CHANGED
|
@@ -21,7 +21,7 @@ from rag.nlp import bullets_category, remove_contents_table, hierarchical_merge,
|
|
| 21 |
make_colon_as_title, tokenize_chunks, docx_question_level
|
| 22 |
from rag.nlp import rag_tokenizer
|
| 23 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
| 24 |
-
from
|
| 25 |
|
| 26 |
|
| 27 |
class Docx(DocxParser):
|
|
@@ -122,8 +122,8 @@ class Pdf(PdfParser):
|
|
| 122 |
start = timer()
|
| 123 |
self._layouts_rec(zoomin)
|
| 124 |
callback(0.67, "Layout analysis finished")
|
| 125 |
-
|
| 126 |
-
|
| 127 |
self._naive_vertical_merge()
|
| 128 |
|
| 129 |
callback(0.8, "Text extraction finished")
|
|
|
|
| 21 |
make_colon_as_title, tokenize_chunks, docx_question_level
|
| 22 |
from rag.nlp import rag_tokenizer
|
| 23 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
| 24 |
+
from api.utils.log_utils import logger
|
| 25 |
|
| 26 |
|
| 27 |
class Docx(DocxParser):
|
|
|
|
| 122 |
start = timer()
|
| 123 |
self._layouts_rec(zoomin)
|
| 124 |
callback(0.67, "Layout analysis finished")
|
| 125 |
+
logger.info("layouts:".format(
|
| 126 |
+
))
|
| 127 |
self._naive_vertical_merge()
|
| 128 |
|
| 129 |
callback(0.8, "Text extraction finished")
|