Introduced beartype (#3460)
Browse files### What problem does this PR solve?
Introduced [beartype](https://github.com/beartype/beartype) for runtime
type-checking.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- agent/component/base.py +1 -2
- api/db/db_models.py +3 -4
- api/db/db_utils.py +3 -4
- api/ragflow_server.py +3 -0
- api/utils/log_utils.py +3 -4
- api/versions.py +2 -4
- deepdoc/parser/json_parser.py +12 -13
- docs/references/python_api_reference.md +1 -1
- graphrag/community_reports_extractor.py +4 -4
- graphrag/entity_embedding.py +0 -1
- graphrag/graph_extractor.py +1 -1
- graphrag/index.py +1 -2
- graphrag/leiden.py +2 -2
- graphrag/mind_map_extractor.py +1 -1
- graphrag/search.py +1 -2
- graphrag/utils.py +1 -2
- poetry.lock +14 -24
- pyproject.toml +1 -0
- rag/llm/embedding_model.py +3 -4
- rag/nlp/search.py +7 -8
- rag/raptor.py +1 -2
- rag/svr/task_executor.py +3 -0
- rag/utils/doc_store_conn.py +8 -13
- rag/utils/es_conn.py +2 -3
- rag/utils/infinity_conn.py +2 -3
- sdk/python/poetry.lock +25 -7
- sdk/python/pyproject.toml +1 -0
- sdk/python/ragflow_sdk/__init__.py +3 -0
- sdk/python/ragflow_sdk/modules/chat.py +2 -3
- sdk/python/ragflow_sdk/modules/dataset.py +3 -5
- sdk/python/ragflow_sdk/modules/document.py +2 -3
- sdk/python/ragflow_sdk/ragflow.py +9 -12
agent/component/base.py
CHANGED
|
@@ -19,7 +19,6 @@ import builtins
|
|
| 19 |
import json
|
| 20 |
import os
|
| 21 |
from functools import partial
|
| 22 |
-
from typing import Tuple, Union
|
| 23 |
|
| 24 |
import pandas as pd
|
| 25 |
|
|
@@ -417,7 +416,7 @@ class ComponentBase(ABC):
|
|
| 417 |
def _run(self, history, **kwargs):
|
| 418 |
raise NotImplementedError()
|
| 419 |
|
| 420 |
-
def output(self, allow_partial=True) ->
|
| 421 |
o = getattr(self._param, self._param.output_var_name)
|
| 422 |
if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
|
| 423 |
if not isinstance(o, list): o = [o]
|
|
|
|
| 19 |
import json
|
| 20 |
import os
|
| 21 |
from functools import partial
|
|
|
|
| 22 |
|
| 23 |
import pandas as pd
|
| 24 |
|
|
|
|
| 416 |
def _run(self, history, **kwargs):
|
| 417 |
raise NotImplementedError()
|
| 418 |
|
| 419 |
+
def output(self, allow_partial=True) -> tuple[str, pd.DataFrame | partial]:
|
| 420 |
o = getattr(self._param, self._param.output_var_name)
|
| 421 |
if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
|
| 422 |
if not isinstance(o, list): o = [o]
|
api/db/db_models.py
CHANGED
|
@@ -17,7 +17,6 @@ import logging
|
|
| 17 |
import inspect
|
| 18 |
import os
|
| 19 |
import sys
|
| 20 |
-
import typing
|
| 21 |
import operator
|
| 22 |
from enum import Enum
|
| 23 |
from functools import wraps
|
|
@@ -121,13 +120,13 @@ class SerializedField(LongTextField):
|
|
| 121 |
f"the serialized type {self._serialized_type} is not supported")
|
| 122 |
|
| 123 |
|
| 124 |
-
def is_continuous_field(cls:
|
| 125 |
if cls in CONTINUOUS_FIELD_TYPE:
|
| 126 |
return True
|
| 127 |
for p in cls.__bases__:
|
| 128 |
if p in CONTINUOUS_FIELD_TYPE:
|
| 129 |
return True
|
| 130 |
-
elif p
|
| 131 |
if is_continuous_field(p):
|
| 132 |
return True
|
| 133 |
else:
|
|
@@ -159,7 +158,7 @@ class BaseModel(Model):
|
|
| 159 |
def to_dict(self):
|
| 160 |
return self.__dict__['__data__']
|
| 161 |
|
| 162 |
-
def to_human_model_dict(self, only_primary_with: list = None):
|
| 163 |
model_dict = self.__dict__['__data__']
|
| 164 |
|
| 165 |
if not only_primary_with:
|
|
|
|
| 17 |
import inspect
|
| 18 |
import os
|
| 19 |
import sys
|
|
|
|
| 20 |
import operator
|
| 21 |
from enum import Enum
|
| 22 |
from functools import wraps
|
|
|
|
| 120 |
f"the serialized type {self._serialized_type} is not supported")
|
| 121 |
|
| 122 |
|
| 123 |
+
def is_continuous_field(cls: type) -> bool:
|
| 124 |
if cls in CONTINUOUS_FIELD_TYPE:
|
| 125 |
return True
|
| 126 |
for p in cls.__bases__:
|
| 127 |
if p in CONTINUOUS_FIELD_TYPE:
|
| 128 |
return True
|
| 129 |
+
elif p is not Field and p is not object:
|
| 130 |
if is_continuous_field(p):
|
| 131 |
return True
|
| 132 |
else:
|
|
|
|
| 158 |
def to_dict(self):
|
| 159 |
return self.__dict__['__data__']
|
| 160 |
|
| 161 |
+
def to_human_model_dict(self, only_primary_with: list | None = None):
|
| 162 |
model_dict = self.__dict__['__data__']
|
| 163 |
|
| 164 |
if not only_primary_with:
|
api/db/db_utils.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
| 15 |
#
|
| 16 |
import operator
|
| 17 |
from functools import reduce
|
| 18 |
-
from typing import Dict, Type, Union
|
| 19 |
|
| 20 |
from playhouse.pool import PooledMySQLDatabase
|
| 21 |
|
|
@@ -87,7 +86,7 @@ supported_operators = {
|
|
| 87 |
|
| 88 |
|
| 89 |
def query_dict2expression(
|
| 90 |
-
model:
|
| 91 |
expression = []
|
| 92 |
|
| 93 |
for field, value in query.items():
|
|
@@ -105,8 +104,8 @@ def query_dict2expression(
|
|
| 105 |
return reduce(operator.iand, expression)
|
| 106 |
|
| 107 |
|
| 108 |
-
def query_db(model:
|
| 109 |
-
query: dict = None, order_by:
|
| 110 |
data = model.select()
|
| 111 |
if query:
|
| 112 |
data = data.where(query_dict2expression(model, query))
|
|
|
|
| 15 |
#
|
| 16 |
import operator
|
| 17 |
from functools import reduce
|
|
|
|
| 18 |
|
| 19 |
from playhouse.pool import PooledMySQLDatabase
|
| 20 |
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def query_dict2expression(
|
| 89 |
+
model: type[DataBaseModel], query: dict[str, bool | int | str | list | tuple]):
|
| 90 |
expression = []
|
| 91 |
|
| 92 |
for field, value in query.items():
|
|
|
|
| 104 |
return reduce(operator.iand, expression)
|
| 105 |
|
| 106 |
|
| 107 |
+
def query_db(model: type[DataBaseModel], limit: int = 0, offset: int = 0,
|
| 108 |
+
query: dict = None, order_by: str | list | tuple | None = None):
|
| 109 |
data = model.select()
|
| 110 |
if query:
|
| 111 |
data = data.where(query_dict2expression(model, query))
|
api/ragflow_server.py
CHANGED
|
@@ -14,6 +14,9 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
import logging
|
| 18 |
from api.utils.log_utils import initRootLogger
|
| 19 |
initRootLogger("ragflow_server")
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
|
| 17 |
+
from beartype.claw import beartype_packages
|
| 18 |
+
beartype_packages(["agent", "api", "deepdoc", "plugins", "rag", "ragflow_sdk"]) # <-- raise exceptions in your code
|
| 19 |
+
|
| 20 |
import logging
|
| 21 |
from api.utils.log_utils import initRootLogger
|
| 22 |
initRootLogger("ragflow_server")
|
api/utils/log_utils.py
CHANGED
|
@@ -28,13 +28,12 @@ def get_project_base_directory():
|
|
| 28 |
)
|
| 29 |
return PROJECT_BASE
|
| 30 |
|
| 31 |
-
def initRootLogger(
|
| 32 |
logger = logging.getLogger()
|
| 33 |
if logger.hasHandlers():
|
| 34 |
return
|
| 35 |
|
| 36 |
-
|
| 37 |
-
log_path = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"{os.path.splitext(script_name)[0]}.log"))
|
| 38 |
|
| 39 |
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
| 40 |
logger.setLevel(log_level)
|
|
@@ -50,5 +49,5 @@ def initRootLogger(script_path: str, log_level: int = logging.INFO, log_format:
|
|
| 50 |
handler2.setFormatter(formatter)
|
| 51 |
logger.addHandler(handler2)
|
| 52 |
|
| 53 |
-
msg = f"{
|
| 54 |
logger.info(msg)
|
|
|
|
| 28 |
)
|
| 29 |
return PROJECT_BASE
|
| 30 |
|
| 31 |
+
def initRootLogger(logfile_basename: str, log_level: int = logging.INFO, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
|
| 32 |
logger = logging.getLogger()
|
| 33 |
if logger.hasHandlers():
|
| 34 |
return
|
| 35 |
|
| 36 |
+
log_path = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"{logfile_basename}.log"))
|
|
|
|
| 37 |
|
| 38 |
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
| 39 |
logger.setLevel(log_level)
|
|
|
|
| 49 |
handler2.setFormatter(formatter)
|
| 50 |
logger.addHandler(handler2)
|
| 51 |
|
| 52 |
+
msg = f"{logfile_basename} log path: {log_path}"
|
| 53 |
logger.info(msg)
|
api/versions.py
CHANGED
|
@@ -13,11 +13,9 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
-
import dotenv
|
| 17 |
-
import typing
|
| 18 |
import subprocess
|
| 19 |
|
| 20 |
-
def get_ragflow_version() ->
|
| 21 |
return RAGFLOW_VERSION_INFO
|
| 22 |
|
| 23 |
|
|
@@ -42,7 +40,7 @@ def get_closest_tag_and_count():
|
|
| 42 |
return closest_tag
|
| 43 |
else:
|
| 44 |
return f"{commit_id}({closest_tag}~{commits_count})"
|
| 45 |
-
except Exception
|
| 46 |
return 'unknown'
|
| 47 |
|
| 48 |
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
| 16 |
import subprocess
|
| 17 |
|
| 18 |
+
def get_ragflow_version() -> str:
|
| 19 |
return RAGFLOW_VERSION_INFO
|
| 20 |
|
| 21 |
|
|
|
|
| 40 |
return closest_tag
|
| 41 |
else:
|
| 42 |
return f"{commit_id}({closest_tag}~{commits_count})"
|
| 43 |
+
except Exception:
|
| 44 |
return 'unknown'
|
| 45 |
|
| 46 |
|
deepdoc/parser/json_parser.py
CHANGED
|
@@ -3,12 +3,11 @@
|
|
| 3 |
# from https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/json.py
|
| 4 |
|
| 5 |
import json
|
| 6 |
-
from typing import Any
|
| 7 |
from rag.nlp import find_codec
|
| 8 |
-
|
| 9 |
class RAGFlowJsonParser:
|
| 10 |
def __init__(
|
| 11 |
-
self, max_chunk_size: int = 2000, min_chunk_size:
|
| 12 |
):
|
| 13 |
super().__init__()
|
| 14 |
self.max_chunk_size = max_chunk_size * 2
|
|
@@ -27,12 +26,12 @@ class RAGFlowJsonParser:
|
|
| 27 |
return sections
|
| 28 |
|
| 29 |
@staticmethod
|
| 30 |
-
def _json_size(data:
|
| 31 |
"""Calculate the size of the serialized JSON object."""
|
| 32 |
return len(json.dumps(data, ensure_ascii=False))
|
| 33 |
|
| 34 |
@staticmethod
|
| 35 |
-
def _set_nested_dict(d:
|
| 36 |
"""Set a value in a nested dictionary based on the given path."""
|
| 37 |
for key in path[:-1]:
|
| 38 |
d = d.setdefault(key, {})
|
|
@@ -54,10 +53,10 @@ class RAGFlowJsonParser:
|
|
| 54 |
|
| 55 |
def _json_split(
|
| 56 |
self,
|
| 57 |
-
data:
|
| 58 |
-
current_path:
|
| 59 |
-
chunks:
|
| 60 |
-
) ->
|
| 61 |
"""
|
| 62 |
Split json into maximum size dictionaries while preserving structure.
|
| 63 |
"""
|
|
@@ -87,9 +86,9 @@ class RAGFlowJsonParser:
|
|
| 87 |
|
| 88 |
def split_json(
|
| 89 |
self,
|
| 90 |
-
json_data:
|
| 91 |
convert_lists: bool = False,
|
| 92 |
-
) ->
|
| 93 |
"""Splits JSON into a list of JSON chunks"""
|
| 94 |
|
| 95 |
if convert_lists:
|
|
@@ -104,10 +103,10 @@ class RAGFlowJsonParser:
|
|
| 104 |
|
| 105 |
def split_text(
|
| 106 |
self,
|
| 107 |
-
json_data:
|
| 108 |
convert_lists: bool = False,
|
| 109 |
ensure_ascii: bool = True,
|
| 110 |
-
) ->
|
| 111 |
"""Splits JSON into a list of JSON formatted strings"""
|
| 112 |
|
| 113 |
chunks = self.split_json(json_data=json_data, convert_lists=convert_lists)
|
|
|
|
| 3 |
# from https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/json.py
|
| 4 |
|
| 5 |
import json
|
| 6 |
+
from typing import Any
|
| 7 |
from rag.nlp import find_codec
|
|
|
|
| 8 |
class RAGFlowJsonParser:
|
| 9 |
def __init__(
|
| 10 |
+
self, max_chunk_size: int = 2000, min_chunk_size: int | None = None
|
| 11 |
):
|
| 12 |
super().__init__()
|
| 13 |
self.max_chunk_size = max_chunk_size * 2
|
|
|
|
| 26 |
return sections
|
| 27 |
|
| 28 |
@staticmethod
|
| 29 |
+
def _json_size(data: dict) -> int:
|
| 30 |
"""Calculate the size of the serialized JSON object."""
|
| 31 |
return len(json.dumps(data, ensure_ascii=False))
|
| 32 |
|
| 33 |
@staticmethod
|
| 34 |
+
def _set_nested_dict(d: dict, path: list[str], value: Any) -> None:
|
| 35 |
"""Set a value in a nested dictionary based on the given path."""
|
| 36 |
for key in path[:-1]:
|
| 37 |
d = d.setdefault(key, {})
|
|
|
|
| 53 |
|
| 54 |
def _json_split(
|
| 55 |
self,
|
| 56 |
+
data: dict[str, Any],
|
| 57 |
+
current_path: list[str] | None,
|
| 58 |
+
chunks: list[dict] | None,
|
| 59 |
+
) -> list[dict]:
|
| 60 |
"""
|
| 61 |
Split json into maximum size dictionaries while preserving structure.
|
| 62 |
"""
|
|
|
|
| 86 |
|
| 87 |
def split_json(
|
| 88 |
self,
|
| 89 |
+
json_data: dict[str, Any],
|
| 90 |
convert_lists: bool = False,
|
| 91 |
+
) -> list[dict]:
|
| 92 |
"""Splits JSON into a list of JSON chunks"""
|
| 93 |
|
| 94 |
if convert_lists:
|
|
|
|
| 103 |
|
| 104 |
def split_text(
|
| 105 |
self,
|
| 106 |
+
json_data: dict[str, Any],
|
| 107 |
convert_lists: bool = False,
|
| 108 |
ensure_ascii: bool = True,
|
| 109 |
+
) -> list[str]:
|
| 110 |
"""Splits JSON into a list of JSON formatted strings"""
|
| 111 |
|
| 112 |
chunks = self.split_json(json_data=json_data, convert_lists=convert_lists)
|
docs/references/python_api_reference.md
CHANGED
|
@@ -1059,7 +1059,7 @@ Deletes chat assistants by ID.
|
|
| 1059 |
|
| 1060 |
#### ids: `list[str]`
|
| 1061 |
|
| 1062 |
-
The IDs of the chat assistants to delete. Defaults to `None`. If it is
|
| 1063 |
|
| 1064 |
### Returns
|
| 1065 |
|
|
|
|
| 1059 |
|
| 1060 |
#### ids: `list[str]`
|
| 1061 |
|
| 1062 |
+
The IDs of the chat assistants to delete. Defaults to `None`. If it is empty or not specified, all chat assistants in the system will be deleted.
|
| 1063 |
|
| 1064 |
### Returns
|
| 1065 |
|
graphrag/community_reports_extractor.py
CHANGED
|
@@ -9,8 +9,8 @@ import logging
|
|
| 9 |
import json
|
| 10 |
import re
|
| 11 |
import traceback
|
|
|
|
| 12 |
from dataclasses import dataclass
|
| 13 |
-
from typing import List, Callable
|
| 14 |
import networkx as nx
|
| 15 |
import pandas as pd
|
| 16 |
from graphrag import leiden
|
|
@@ -26,8 +26,8 @@ from timeit import default_timer as timer
|
|
| 26 |
class CommunityReportsResult:
|
| 27 |
"""Community reports result class definition."""
|
| 28 |
|
| 29 |
-
output:
|
| 30 |
-
structured_output:
|
| 31 |
|
| 32 |
|
| 33 |
class CommunityReportsExtractor:
|
|
@@ -53,7 +53,7 @@ class CommunityReportsExtractor:
|
|
| 53 |
self._max_report_length = max_report_length or 1500
|
| 54 |
|
| 55 |
def __call__(self, graph: nx.Graph, callback: Callable | None = None):
|
| 56 |
-
communities: dict[str, dict[str,
|
| 57 |
total = sum([len(comm.items()) for _, comm in communities.items()])
|
| 58 |
relations_df = pd.DataFrame([{"source":s, "target": t, **attr} for s, t, attr in graph.edges(data=True)])
|
| 59 |
res_str = []
|
|
|
|
| 9 |
import json
|
| 10 |
import re
|
| 11 |
import traceback
|
| 12 |
+
from typing import Callable
|
| 13 |
from dataclasses import dataclass
|
|
|
|
| 14 |
import networkx as nx
|
| 15 |
import pandas as pd
|
| 16 |
from graphrag import leiden
|
|
|
|
| 26 |
class CommunityReportsResult:
|
| 27 |
"""Community reports result class definition."""
|
| 28 |
|
| 29 |
+
output: list[str]
|
| 30 |
+
structured_output: list[dict]
|
| 31 |
|
| 32 |
|
| 33 |
class CommunityReportsExtractor:
|
|
|
|
| 53 |
self._max_report_length = max_report_length or 1500
|
| 54 |
|
| 55 |
def __call__(self, graph: nx.Graph, callback: Callable | None = None):
|
| 56 |
+
communities: dict[str, dict[str, list]] = leiden.run(graph, {})
|
| 57 |
total = sum([len(comm.items()) for _, comm in communities.items()])
|
| 58 |
relations_df = pd.DataFrame([{"source":s, "target": t, **attr} for s, t, attr in graph.edges(data=True)])
|
| 59 |
res_str = []
|
graphrag/entity_embedding.py
CHANGED
|
@@ -6,7 +6,6 @@ Reference:
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
from typing import Any
|
| 9 |
-
|
| 10 |
import numpy as np
|
| 11 |
import networkx as nx
|
| 12 |
from graphrag.leiden import stable_largest_connected_component
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
from typing import Any
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import networkx as nx
|
| 11 |
from graphrag.leiden import stable_largest_connected_component
|
graphrag/graph_extractor.py
CHANGED
|
@@ -9,8 +9,8 @@ import logging
|
|
| 9 |
import numbers
|
| 10 |
import re
|
| 11 |
import traceback
|
|
|
|
| 12 |
from dataclasses import dataclass
|
| 13 |
-
from typing import Any, Mapping, Callable
|
| 14 |
import tiktoken
|
| 15 |
from graphrag.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
| 16 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, clean_str
|
|
|
|
| 9 |
import numbers
|
| 10 |
import re
|
| 11 |
import traceback
|
| 12 |
+
from typing import Any, Callable
|
| 13 |
from dataclasses import dataclass
|
|
|
|
| 14 |
import tiktoken
|
| 15 |
from graphrag.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
| 16 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, clean_str
|
graphrag/index.py
CHANGED
|
@@ -18,7 +18,6 @@ import os
|
|
| 18 |
from concurrent.futures import ThreadPoolExecutor
|
| 19 |
import json
|
| 20 |
from functools import reduce
|
| 21 |
-
from typing import List
|
| 22 |
import networkx as nx
|
| 23 |
from api.db import LLMType
|
| 24 |
from api.db.services.llm_service import LLMBundle
|
|
@@ -53,7 +52,7 @@ def graph_merge(g1, g2):
|
|
| 53 |
return g
|
| 54 |
|
| 55 |
|
| 56 |
-
def build_knowledge_graph_chunks(tenant_id: str, chunks:
|
| 57 |
_, tenant = TenantService.get_by_id(tenant_id)
|
| 58 |
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
| 59 |
ext = GraphExtractor(llm_bdl)
|
|
|
|
| 18 |
from concurrent.futures import ThreadPoolExecutor
|
| 19 |
import json
|
| 20 |
from functools import reduce
|
|
|
|
| 21 |
import networkx as nx
|
| 22 |
from api.db import LLMType
|
| 23 |
from api.db.services.llm_service import LLMBundle
|
|
|
|
| 52 |
return g
|
| 53 |
|
| 54 |
|
| 55 |
+
def build_knowledge_graph_chunks(tenant_id: str, chunks: list[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
|
| 56 |
_, tenant = TenantService.get_by_id(tenant_id)
|
| 57 |
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
| 58 |
ext = GraphExtractor(llm_bdl)
|
graphrag/leiden.py
CHANGED
|
@@ -6,8 +6,8 @@ Reference:
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import logging
|
| 9 |
-
from typing import Any, cast, List
|
| 10 |
import html
|
|
|
|
| 11 |
from graspologic.partition import hierarchical_leiden
|
| 12 |
from graspologic.utils import largest_connected_component
|
| 13 |
|
|
@@ -132,7 +132,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
|
|
| 132 |
return results_by_level
|
| 133 |
|
| 134 |
|
| 135 |
-
def add_community_info2graph(graph: nx.Graph, nodes:
|
| 136 |
for n in nodes:
|
| 137 |
if "communities" not in graph.nodes[n]:
|
| 138 |
graph.nodes[n]["communities"] = []
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import logging
|
|
|
|
| 9 |
import html
|
| 10 |
+
from typing import Any
|
| 11 |
from graspologic.partition import hierarchical_leiden
|
| 12 |
from graspologic.utils import largest_connected_component
|
| 13 |
|
|
|
|
| 132 |
return results_by_level
|
| 133 |
|
| 134 |
|
| 135 |
+
def add_community_info2graph(graph: nx.Graph, nodes: list[str], community_title):
|
| 136 |
for n in nodes:
|
| 137 |
if "communities" not in graph.nodes[n]:
|
| 138 |
graph.nodes[n]["communities"] = []
|
graphrag/mind_map_extractor.py
CHANGED
|
@@ -19,9 +19,9 @@ import collections
|
|
| 19 |
import os
|
| 20 |
import re
|
| 21 |
import traceback
|
|
|
|
| 22 |
from concurrent.futures import ThreadPoolExecutor
|
| 23 |
from dataclasses import dataclass
|
| 24 |
-
from typing import Any
|
| 25 |
|
| 26 |
from graphrag.mind_map_prompt import MIND_MAP_EXTRACTION_PROMPT
|
| 27 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
|
|
|
| 19 |
import os
|
| 20 |
import re
|
| 21 |
import traceback
|
| 22 |
+
from typing import Any
|
| 23 |
from concurrent.futures import ThreadPoolExecutor
|
| 24 |
from dataclasses import dataclass
|
|
|
|
| 25 |
|
| 26 |
from graphrag.mind_map_prompt import MIND_MAP_EXTRACTION_PROMPT
|
| 27 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
graphrag/search.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
| 15 |
#
|
| 16 |
import json
|
| 17 |
from copy import deepcopy
|
| 18 |
-
from typing import Dict
|
| 19 |
|
| 20 |
import pandas as pd
|
| 21 |
from rag.utils.doc_store_conn import OrderByExpr, FusionExpr
|
|
@@ -25,7 +24,7 @@ from rag.nlp.search import Dealer
|
|
| 25 |
|
| 26 |
class KGSearch(Dealer):
|
| 27 |
def search(self, req, idxnm, kb_ids, emb_mdl, highlight=False):
|
| 28 |
-
def merge_into_first(sres, title="") ->
|
| 29 |
if not sres:
|
| 30 |
return {}
|
| 31 |
content_with_weight = ""
|
|
|
|
| 15 |
#
|
| 16 |
import json
|
| 17 |
from copy import deepcopy
|
|
|
|
| 18 |
|
| 19 |
import pandas as pd
|
| 20 |
from rag.utils.doc_store_conn import OrderByExpr, FusionExpr
|
|
|
|
| 24 |
|
| 25 |
class KGSearch(Dealer):
|
| 26 |
def search(self, req, idxnm, kb_ids, emb_mdl, highlight=False):
|
| 27 |
+
def merge_into_first(sres, title="") -> dict[str, str]:
|
| 28 |
if not sres:
|
| 29 |
return {}
|
| 30 |
content_with_weight = ""
|
graphrag/utils.py
CHANGED
|
@@ -7,8 +7,7 @@ Reference:
|
|
| 7 |
|
| 8 |
import html
|
| 9 |
import re
|
| 10 |
-
from
|
| 11 |
-
from typing import Any
|
| 12 |
|
| 13 |
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
|
| 14 |
|
|
|
|
| 7 |
|
| 8 |
import html
|
| 9 |
import re
|
| 10 |
+
from typing import Any, Callable
|
|
|
|
| 11 |
|
| 12 |
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
|
| 13 |
|
poetry.lock
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# This file is automatically @generated by Poetry 1.8.
|
| 2 |
|
| 3 |
[[package]]
|
| 4 |
name = "accelerate"
|
|
@@ -413,7 +413,7 @@ name = "aspose-slides"
|
|
| 413 |
version = "24.11.0"
|
| 414 |
description = "Aspose.Slides for Python via .NET is a presentation file formats processing library for working with Microsoft PowerPoint files without using Microsoft PowerPoint."
|
| 415 |
optional = false
|
| 416 |
-
python-versions = "
|
| 417 |
files = [
|
| 418 |
{file = "Aspose.Slides-24.11.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:b4819364497f9e075e00e63ee8fba8745dda4c910e199d5201e4abeebdcdec89"},
|
| 419 |
{file = "Aspose.Slides-24.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bbeb5f0b14901f29f209beeac694a183f8d36c9475556ddeed3b2edb8107536a"},
|
|
@@ -565,7 +565,7 @@ name = "bce-python-sdk"
|
|
| 565 |
version = "0.9.23"
|
| 566 |
description = "BCE SDK for python"
|
| 567 |
optional = false
|
| 568 |
-
python-versions = "!=3.0
|
| 569 |
files = [
|
| 570 |
{file = "bce_python_sdk-0.9.23-py3-none-any.whl", hash = "sha256:8debe21a040e00060f6044877d594765ed7b18bc765c6bf16b878bca864140a3"},
|
| 571 |
{file = "bce_python_sdk-0.9.23.tar.gz", hash = "sha256:19739fed5cd0725356fc5ffa2acbdd8fb23f2a81edb91db21a03174551d0cf41"},
|
|
@@ -1706,7 +1706,7 @@ name = "deprecated"
|
|
| 1706 |
version = "1.2.15"
|
| 1707 |
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
|
| 1708 |
optional = false
|
| 1709 |
-
python-versions = "!=3.0
|
| 1710 |
files = [
|
| 1711 |
{file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"},
|
| 1712 |
{file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"},
|
|
@@ -2023,7 +2023,7 @@ name = "fastembed"
|
|
| 2023 |
version = "0.3.6"
|
| 2024 |
description = "Fast, light, accurate library built for retrieval embedding generation"
|
| 2025 |
optional = false
|
| 2026 |
-
python-versions = "
|
| 2027 |
files = [
|
| 2028 |
{file = "fastembed-0.3.6-py3-none-any.whl", hash = "sha256:2bf70edae28bb4ccd9e01617098c2075b0ba35b88025a3d22b0e1e85b2c488ce"},
|
| 2029 |
{file = "fastembed-0.3.6.tar.gz", hash = "sha256:c93c8ec99b8c008c2d192d6297866b8d70ec7ac8f5696b34eb5ea91f85efd15f"},
|
|
@@ -2940,7 +2940,7 @@ name = "graspologic"
|
|
| 2940 |
version = "3.4.1"
|
| 2941 |
description = "A set of Python modules for graph statistics"
|
| 2942 |
optional = false
|
| 2943 |
-
python-versions = "
|
| 2944 |
files = [
|
| 2945 |
{file = "graspologic-3.4.1-py3-none-any.whl", hash = "sha256:c6563e087eda599bad1de831d4b7321c0daa7a82f4e85a7d7737ff67e07cdda2"},
|
| 2946 |
{file = "graspologic-3.4.1.tar.gz", hash = "sha256:7561f0b852a2bccd351bff77e8db07d9892f9dfa35a420fdec01690e4fdc8075"},
|
|
@@ -3625,7 +3625,7 @@ name = "infinity-emb"
|
|
| 3625 |
version = "0.0.66"
|
| 3626 |
description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip."
|
| 3627 |
optional = false
|
| 3628 |
-
python-versions = "
|
| 3629 |
files = [
|
| 3630 |
{file = "infinity_emb-0.0.66-py3-none-any.whl", hash = "sha256:1dc6ed9fa48e6cbe83650a7583dbbb4bc393900c39c326bb0aff2ddc090ac018"},
|
| 3631 |
{file = "infinity_emb-0.0.66.tar.gz", hash = "sha256:9c9a361ccebf8e8f626c1f685286518d03d0c35e7d14179ae7c2500b4fc68b98"},
|
|
@@ -4070,7 +4070,7 @@ name = "litellm"
|
|
| 4070 |
version = "1.48.0"
|
| 4071 |
description = "Library to easily interface with LLM API providers"
|
| 4072 |
optional = false
|
| 4073 |
-
python-versions = "!=2.7
|
| 4074 |
files = [
|
| 4075 |
{file = "litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9"},
|
| 4076 |
{file = "litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9"},
|
|
@@ -6197,7 +6197,7 @@ name = "psutil"
|
|
| 6197 |
version = "6.1.0"
|
| 6198 |
description = "Cross-platform lib for process and system monitoring in Python."
|
| 6199 |
optional = false
|
| 6200 |
-
python-versions = "!=3.0
|
| 6201 |
files = [
|
| 6202 |
{file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"},
|
| 6203 |
{file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"},
|
|
@@ -6219,8 +6219,8 @@ files = [
|
|
| 6219 |
]
|
| 6220 |
|
| 6221 |
[package.extras]
|
| 6222 |
-
dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "
|
| 6223 |
-
test = ["pytest", "pytest-xdist", "setuptools"]
|
| 6224 |
|
| 6225 |
[[package]]
|
| 6226 |
name = "psycopg2-binary"
|
|
@@ -7690,40 +7690,30 @@ files = [
|
|
| 7690 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969"},
|
| 7691 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df"},
|
| 7692 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"},
|
| 7693 |
-
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"},
|
| 7694 |
-
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"},
|
| 7695 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"},
|
| 7696 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"},
|
| 7697 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"},
|
| 7698 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e"},
|
| 7699 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e"},
|
| 7700 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"},
|
| 7701 |
-
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"},
|
| 7702 |
-
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"},
|
| 7703 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"},
|
| 7704 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"},
|
| 7705 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"},
|
| 7706 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d"},
|
| 7707 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c"},
|
| 7708 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"},
|
| 7709 |
-
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"},
|
| 7710 |
-
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"},
|
| 7711 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"},
|
| 7712 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"},
|
| 7713 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"},
|
| 7714 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475"},
|
| 7715 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef"},
|
| 7716 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"},
|
| 7717 |
-
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"},
|
| 7718 |
-
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"},
|
| 7719 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"},
|
| 7720 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"},
|
| 7721 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"},
|
| 7722 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bc5f1e1c28e966d61d2519f2a3d451ba989f9ea0f2307de7bc45baa526de9e45"},
|
| 7723 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a0e060aace4c24dcaf71023bbd7d42674e3b230f7e7b97317baf1e953e5b519"},
|
| 7724 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"},
|
| 7725 |
-
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"},
|
| 7726 |
-
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"},
|
| 7727 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"},
|
| 7728 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"},
|
| 7729 |
{file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"},
|
|
@@ -7734,7 +7724,7 @@ name = "s3transfer"
|
|
| 7734 |
version = "0.10.3"
|
| 7735 |
description = "An Amazon S3 Transfer Manager"
|
| 7736 |
optional = false
|
| 7737 |
-
python-versions = ">=3.8"
|
| 7738 |
files = [
|
| 7739 |
{file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"},
|
| 7740 |
{file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"},
|
|
@@ -8196,7 +8186,7 @@ name = "smart-open"
|
|
| 8196 |
version = "7.0.5"
|
| 8197 |
description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
|
| 8198 |
optional = false
|
| 8199 |
-
python-versions = "
|
| 8200 |
files = [
|
| 8201 |
{file = "smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89"},
|
| 8202 |
{file = "smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18"},
|
|
@@ -9967,4 +9957,4 @@ cffi = ["cffi (>=1.11)"]
|
|
| 9967 |
[metadata]
|
| 9968 |
lock-version = "2.0"
|
| 9969 |
python-versions = ">=3.11,<3.13"
|
| 9970 |
-
content-hash = "
|
|
|
|
| 1 |
+
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
| 2 |
|
| 3 |
[[package]]
|
| 4 |
name = "accelerate"
|
|
|
|
| 413 |
version = "24.11.0"
|
| 414 |
description = "Aspose.Slides for Python via .NET is a presentation file formats processing library for working with Microsoft PowerPoint files without using Microsoft PowerPoint."
|
| 415 |
optional = false
|
| 416 |
+
python-versions = ">=3.5,<3.14"
|
| 417 |
files = [
|
| 418 |
{file = "Aspose.Slides-24.11.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:b4819364497f9e075e00e63ee8fba8745dda4c910e199d5201e4abeebdcdec89"},
|
| 419 |
{file = "Aspose.Slides-24.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bbeb5f0b14901f29f209beeac694a183f8d36c9475556ddeed3b2edb8107536a"},
|
|
|
|
| 565 |
version = "0.9.23"
|
| 566 |
description = "BCE SDK for python"
|
| 567 |
optional = false
|
| 568 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4"
|
| 569 |
files = [
|
| 570 |
{file = "bce_python_sdk-0.9.23-py3-none-any.whl", hash = "sha256:8debe21a040e00060f6044877d594765ed7b18bc765c6bf16b878bca864140a3"},
|
| 571 |
{file = "bce_python_sdk-0.9.23.tar.gz", hash = "sha256:19739fed5cd0725356fc5ffa2acbdd8fb23f2a81edb91db21a03174551d0cf41"},
|
|
|
|
| 1706 |
version = "1.2.15"
|
| 1707 |
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
|
| 1708 |
optional = false
|
| 1709 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
| 1710 |
files = [
|
| 1711 |
{file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"},
|
| 1712 |
{file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"},
|
|
|
|
| 2023 |
version = "0.3.6"
|
| 2024 |
description = "Fast, light, accurate library built for retrieval embedding generation"
|
| 2025 |
optional = false
|
| 2026 |
+
python-versions = ">=3.8.0,<3.13"
|
| 2027 |
files = [
|
| 2028 |
{file = "fastembed-0.3.6-py3-none-any.whl", hash = "sha256:2bf70edae28bb4ccd9e01617098c2075b0ba35b88025a3d22b0e1e85b2c488ce"},
|
| 2029 |
{file = "fastembed-0.3.6.tar.gz", hash = "sha256:c93c8ec99b8c008c2d192d6297866b8d70ec7ac8f5696b34eb5ea91f85efd15f"},
|
|
|
|
| 2940 |
version = "3.4.1"
|
| 2941 |
description = "A set of Python modules for graph statistics"
|
| 2942 |
optional = false
|
| 2943 |
+
python-versions = ">=3.9,<3.13"
|
| 2944 |
files = [
|
| 2945 |
{file = "graspologic-3.4.1-py3-none-any.whl", hash = "sha256:c6563e087eda599bad1de831d4b7321c0daa7a82f4e85a7d7737ff67e07cdda2"},
|
| 2946 |
{file = "graspologic-3.4.1.tar.gz", hash = "sha256:7561f0b852a2bccd351bff77e8db07d9892f9dfa35a420fdec01690e4fdc8075"},
|
|
|
|
| 3625 |
version = "0.0.66"
|
| 3626 |
description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip."
|
| 3627 |
optional = false
|
| 3628 |
+
python-versions = ">=3.9,<4"
|
| 3629 |
files = [
|
| 3630 |
{file = "infinity_emb-0.0.66-py3-none-any.whl", hash = "sha256:1dc6ed9fa48e6cbe83650a7583dbbb4bc393900c39c326bb0aff2ddc090ac018"},
|
| 3631 |
{file = "infinity_emb-0.0.66.tar.gz", hash = "sha256:9c9a361ccebf8e8f626c1f685286518d03d0c35e7d14179ae7c2500b4fc68b98"},
|
|
|
|
| 4070 |
version = "1.48.0"
|
| 4071 |
description = "Library to easily interface with LLM API providers"
|
| 4072 |
optional = false
|
| 4073 |
+
python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*"
|
| 4074 |
files = [
|
| 4075 |
{file = "litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9"},
|
| 4076 |
{file = "litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9"},
|
|
|
|
| 6197 |
version = "6.1.0"
|
| 6198 |
description = "Cross-platform lib for process and system monitoring in Python."
|
| 6199 |
optional = false
|
| 6200 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
|
| 6201 |
files = [
|
| 6202 |
{file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"},
|
| 6203 |
{file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"},
|
|
|
|
| 6219 |
]
|
| 6220 |
|
| 6221 |
[package.extras]
|
| 6222 |
+
dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx-rtd-theme", "toml-sort", "twine", "virtualenv", "wheel"]
|
| 6223 |
+
test = ["enum34", "futures", "ipaddress", "mock (==1.0.1)", "pytest (==4.6.11)", "pytest-xdist", "setuptools", "unittest2"]
|
| 6224 |
|
| 6225 |
[[package]]
|
| 6226 |
name = "psycopg2-binary"
|
|
|
|
| 7690 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969"},
|
| 7691 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df"},
|
| 7692 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"},
|
|
|
|
|
|
|
| 7693 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"},
|
| 7694 |
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"},
|
| 7695 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"},
|
| 7696 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e"},
|
| 7697 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e"},
|
| 7698 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"},
|
|
|
|
|
|
|
| 7699 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"},
|
| 7700 |
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"},
|
| 7701 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"},
|
| 7702 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d"},
|
| 7703 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c"},
|
| 7704 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"},
|
|
|
|
|
|
|
| 7705 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"},
|
| 7706 |
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"},
|
| 7707 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"},
|
| 7708 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475"},
|
| 7709 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef"},
|
| 7710 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"},
|
|
|
|
|
|
|
| 7711 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"},
|
| 7712 |
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"},
|
| 7713 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"},
|
| 7714 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bc5f1e1c28e966d61d2519f2a3d451ba989f9ea0f2307de7bc45baa526de9e45"},
|
| 7715 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a0e060aace4c24dcaf71023bbd7d42674e3b230f7e7b97317baf1e953e5b519"},
|
| 7716 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"},
|
|
|
|
|
|
|
| 7717 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"},
|
| 7718 |
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"},
|
| 7719 |
{file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"},
|
|
|
|
| 7724 |
version = "0.10.3"
|
| 7725 |
description = "An Amazon S3 Transfer Manager"
|
| 7726 |
optional = false
|
| 7727 |
+
python-versions = ">= 3.8"
|
| 7728 |
files = [
|
| 7729 |
{file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"},
|
| 7730 |
{file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"},
|
|
|
|
| 8186 |
version = "7.0.5"
|
| 8187 |
description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
|
| 8188 |
optional = false
|
| 8189 |
+
python-versions = ">=3.7,<4.0"
|
| 8190 |
files = [
|
| 8191 |
{file = "smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89"},
|
| 8192 |
{file = "smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18"},
|
|
|
|
| 9957 |
[metadata]
|
| 9958 |
lock-version = "2.0"
|
| 9959 |
python-versions = ">=3.11,<3.13"
|
| 9960 |
+
content-hash = "dcf6c6a1d7fc52f982ef717bc48b13837e693b53a4fc5d9c06cefd253227259f"
|
pyproject.toml
CHANGED
|
@@ -17,6 +17,7 @@ azure-storage-file-datalake = "12.16.0"
|
|
| 17 |
anthropic = "=0.34.1"
|
| 18 |
arxiv = "2.1.3"
|
| 19 |
aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
|
|
|
|
| 20 |
bio = "1.7.1"
|
| 21 |
boto3 = "1.34.140"
|
| 22 |
botocore = "1.34.140"
|
|
|
|
| 17 |
anthropic = "=0.34.1"
|
| 18 |
arxiv = "2.1.3"
|
| 19 |
aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
|
| 20 |
+
beartype = "^0.18.5"
|
| 21 |
bio = "1.7.1"
|
| 22 |
boto3 = "1.34.140"
|
| 23 |
botocore = "1.34.140"
|
rag/llm/embedding_model.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
| 15 |
#
|
| 16 |
import logging
|
| 17 |
import re
|
| 18 |
-
from typing import Optional
|
| 19 |
import threading
|
| 20 |
import requests
|
| 21 |
from huggingface_hub import snapshot_download
|
|
@@ -242,10 +241,10 @@ class FastEmbed(Base):
|
|
| 242 |
|
| 243 |
def __init__(
|
| 244 |
self,
|
| 245 |
-
key:
|
| 246 |
model_name: str = "BAAI/bge-small-en-v1.5",
|
| 247 |
-
cache_dir:
|
| 248 |
-
threads:
|
| 249 |
**kwargs,
|
| 250 |
):
|
| 251 |
if not settings.LIGHTEN and not FastEmbed._model:
|
|
|
|
| 15 |
#
|
| 16 |
import logging
|
| 17 |
import re
|
|
|
|
| 18 |
import threading
|
| 19 |
import requests
|
| 20 |
from huggingface_hub import snapshot_download
|
|
|
|
| 241 |
|
| 242 |
def __init__(
|
| 243 |
self,
|
| 244 |
+
key: str | None = None,
|
| 245 |
model_name: str = "BAAI/bge-small-en-v1.5",
|
| 246 |
+
cache_dir: str | None = None,
|
| 247 |
+
threads: int | None = None,
|
| 248 |
**kwargs,
|
| 249 |
):
|
| 250 |
if not settings.LIGHTEN and not FastEmbed._model:
|
rag/nlp/search.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
| 17 |
import logging
|
| 18 |
import re
|
| 19 |
import json
|
| 20 |
-
from typing import List, Optional, Dict, Union
|
| 21 |
from dataclasses import dataclass
|
| 22 |
|
| 23 |
from rag.utils import rmSpace
|
|
@@ -37,13 +36,13 @@ class Dealer:
|
|
| 37 |
@dataclass
|
| 38 |
class SearchResult:
|
| 39 |
total: int
|
| 40 |
-
ids:
|
| 41 |
-
query_vector:
|
| 42 |
-
field:
|
| 43 |
-
highlight:
|
| 44 |
-
aggregation:
|
| 45 |
-
keywords:
|
| 46 |
-
group_docs:
|
| 47 |
|
| 48 |
def get_vector(self, txt, emb_mdl, topk=10, similarity=0.1):
|
| 49 |
qv, _ = emb_mdl.encode_queries(txt)
|
|
|
|
| 17 |
import logging
|
| 18 |
import re
|
| 19 |
import json
|
|
|
|
| 20 |
from dataclasses import dataclass
|
| 21 |
|
| 22 |
from rag.utils import rmSpace
|
|
|
|
| 36 |
@dataclass
|
| 37 |
class SearchResult:
|
| 38 |
total: int
|
| 39 |
+
ids: list[str]
|
| 40 |
+
query_vector: list[float] | None = None
|
| 41 |
+
field: dict | None = None
|
| 42 |
+
highlight: dict | None = None
|
| 43 |
+
aggregation: list | dict | None = None
|
| 44 |
+
keywords: list[str] | None = None
|
| 45 |
+
group_docs: list[list] | None = None
|
| 46 |
|
| 47 |
def get_vector(self, txt, emb_mdl, topk=10, similarity=0.1):
|
| 48 |
qv, _ = emb_mdl.encode_queries(txt)
|
rag/raptor.py
CHANGED
|
@@ -17,7 +17,6 @@ import logging
|
|
| 17 |
import re
|
| 18 |
from concurrent.futures import ThreadPoolExecutor, ALL_COMPLETED, wait
|
| 19 |
from threading import Lock
|
| 20 |
-
from typing import Tuple
|
| 21 |
import umap
|
| 22 |
import numpy as np
|
| 23 |
from sklearn.mixture import GaussianMixture
|
|
@@ -45,7 +44,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|
| 45 |
optimal_clusters = n_clusters[np.argmin(bics)]
|
| 46 |
return optimal_clusters
|
| 47 |
|
| 48 |
-
def __call__(self, chunks:
|
| 49 |
layers = [(0, len(chunks))]
|
| 50 |
start, end = 0, len(chunks)
|
| 51 |
if len(chunks) <= 1: return
|
|
|
|
| 17 |
import re
|
| 18 |
from concurrent.futures import ThreadPoolExecutor, ALL_COMPLETED, wait
|
| 19 |
from threading import Lock
|
|
|
|
| 20 |
import umap
|
| 21 |
import numpy as np
|
| 22 |
from sklearn.mixture import GaussianMixture
|
|
|
|
| 44 |
optimal_clusters = n_clusters[np.argmin(bics)]
|
| 45 |
return optimal_clusters
|
| 46 |
|
| 47 |
+
def __call__(self, chunks: tuple[str, np.ndarray], random_state, callback=None):
|
| 48 |
layers = [(0, len(chunks))]
|
| 49 |
start, end = 0, len(chunks)
|
| 50 |
if len(chunks) <= 1: return
|
rag/svr/task_executor.py
CHANGED
|
@@ -13,6 +13,9 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
|
|
|
| 16 |
import logging
|
| 17 |
import sys
|
| 18 |
from api.utils.log_utils import initRootLogger
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
from beartype.claw import beartype_packages
|
| 17 |
+
beartype_packages(["agent", "api", "deepdoc", "plugins", "rag", "ragflow_sdk"]) # <-- raise exceptions in your code
|
| 18 |
+
|
| 19 |
import logging
|
| 20 |
import sys
|
| 21 |
from api.utils.log_utils import initRootLogger
|
rag/utils/doc_store_conn.py
CHANGED
|
@@ -1,19 +1,17 @@
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
-
from typing import Optional, Union
|
| 3 |
from dataclasses import dataclass
|
| 4 |
import numpy as np
|
| 5 |
import polars as pl
|
| 6 |
-
from typing import List, Dict
|
| 7 |
|
| 8 |
DEFAULT_MATCH_VECTOR_TOPN = 10
|
| 9 |
DEFAULT_MATCH_SPARSE_TOPN = 10
|
| 10 |
-
VEC =
|
| 11 |
|
| 12 |
|
| 13 |
@dataclass
|
| 14 |
class SparseVector:
|
| 15 |
indices: list[int]
|
| 16 |
-
values:
|
| 17 |
|
| 18 |
def __post_init__(self):
|
| 19 |
assert (self.values is None) or (len(self.indices) == len(self.values))
|
|
@@ -82,7 +80,7 @@ class MatchSparseExpr(ABC):
|
|
| 82 |
sparse_data: SparseVector | dict,
|
| 83 |
distance_type: str,
|
| 84 |
topn: int,
|
| 85 |
-
opt_params:
|
| 86 |
):
|
| 87 |
self.vector_column_name = vector_column_name
|
| 88 |
self.sparse_data = sparse_data
|
|
@@ -98,7 +96,7 @@ class MatchTensorExpr(ABC):
|
|
| 98 |
query_data: VEC,
|
| 99 |
query_data_type: str,
|
| 100 |
topn: int,
|
| 101 |
-
extra_option:
|
| 102 |
):
|
| 103 |
self.column_name = column_name
|
| 104 |
self.query_data = query_data
|
|
@@ -108,16 +106,13 @@ class MatchTensorExpr(ABC):
|
|
| 108 |
|
| 109 |
|
| 110 |
class FusionExpr(ABC):
|
| 111 |
-
def __init__(self, method: str, topn: int, fusion_params:
|
| 112 |
self.method = method
|
| 113 |
self.topn = topn
|
| 114 |
self.fusion_params = fusion_params
|
| 115 |
|
| 116 |
|
| 117 |
-
MatchExpr =
|
| 118 |
-
MatchTextExpr, MatchDenseExpr, MatchSparseExpr, MatchTensorExpr, FusionExpr
|
| 119 |
-
]
|
| 120 |
-
|
| 121 |
|
| 122 |
class OrderByExpr(ABC):
|
| 123 |
def __init__(self):
|
|
@@ -229,11 +224,11 @@ class DocStoreConnection(ABC):
|
|
| 229 |
raise NotImplementedError("Not implemented")
|
| 230 |
|
| 231 |
@abstractmethod
|
| 232 |
-
def getFields(self, res, fields:
|
| 233 |
raise NotImplementedError("Not implemented")
|
| 234 |
|
| 235 |
@abstractmethod
|
| 236 |
-
def getHighlight(self, res, keywords:
|
| 237 |
raise NotImplementedError("Not implemented")
|
| 238 |
|
| 239 |
@abstractmethod
|
|
|
|
| 1 |
from abc import ABC, abstractmethod
|
|
|
|
| 2 |
from dataclasses import dataclass
|
| 3 |
import numpy as np
|
| 4 |
import polars as pl
|
|
|
|
| 5 |
|
| 6 |
DEFAULT_MATCH_VECTOR_TOPN = 10
|
| 7 |
DEFAULT_MATCH_SPARSE_TOPN = 10
|
| 8 |
+
VEC = list | np.ndarray
|
| 9 |
|
| 10 |
|
| 11 |
@dataclass
|
| 12 |
class SparseVector:
|
| 13 |
indices: list[int]
|
| 14 |
+
values: list[float] | list[int] | None = None
|
| 15 |
|
| 16 |
def __post_init__(self):
|
| 17 |
assert (self.values is None) or (len(self.indices) == len(self.values))
|
|
|
|
| 80 |
sparse_data: SparseVector | dict,
|
| 81 |
distance_type: str,
|
| 82 |
topn: int,
|
| 83 |
+
opt_params: dict | None = None,
|
| 84 |
):
|
| 85 |
self.vector_column_name = vector_column_name
|
| 86 |
self.sparse_data = sparse_data
|
|
|
|
| 96 |
query_data: VEC,
|
| 97 |
query_data_type: str,
|
| 98 |
topn: int,
|
| 99 |
+
extra_option: dict | None = None,
|
| 100 |
):
|
| 101 |
self.column_name = column_name
|
| 102 |
self.query_data = query_data
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
class FusionExpr(ABC):
|
| 109 |
+
def __init__(self, method: str, topn: int, fusion_params: dict | None = None):
|
| 110 |
self.method = method
|
| 111 |
self.topn = topn
|
| 112 |
self.fusion_params = fusion_params
|
| 113 |
|
| 114 |
|
| 115 |
+
MatchExpr = MatchTextExpr | MatchDenseExpr | MatchSparseExpr | MatchTensorExpr | FusionExpr
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
class OrderByExpr(ABC):
|
| 118 |
def __init__(self):
|
|
|
|
| 224 |
raise NotImplementedError("Not implemented")
|
| 225 |
|
| 226 |
@abstractmethod
|
| 227 |
+
def getFields(self, res, fields: list[str]) -> dict[str, dict]:
|
| 228 |
raise NotImplementedError("Not implemented")
|
| 229 |
|
| 230 |
@abstractmethod
|
| 231 |
+
def getHighlight(self, res, keywords: list[str], fieldnm: str):
|
| 232 |
raise NotImplementedError("Not implemented")
|
| 233 |
|
| 234 |
@abstractmethod
|
rag/utils/es_conn.py
CHANGED
|
@@ -3,7 +3,6 @@ import re
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import os
|
| 6 |
-
from typing import List, Dict
|
| 7 |
|
| 8 |
import copy
|
| 9 |
from elasticsearch import Elasticsearch
|
|
@@ -363,7 +362,7 @@ class ESConnection(DocStoreConnection):
|
|
| 363 |
rr.append(d["_source"])
|
| 364 |
return rr
|
| 365 |
|
| 366 |
-
def getFields(self, res, fields:
|
| 367 |
res_fields = {}
|
| 368 |
if not fields:
|
| 369 |
return {}
|
|
@@ -382,7 +381,7 @@ class ESConnection(DocStoreConnection):
|
|
| 382 |
res_fields[d["id"]] = m
|
| 383 |
return res_fields
|
| 384 |
|
| 385 |
-
def getHighlight(self, res, keywords:
|
| 386 |
ans = {}
|
| 387 |
for d in res["hits"]["hits"]:
|
| 388 |
hlts = d.get("highlight")
|
|
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
import copy
|
| 8 |
from elasticsearch import Elasticsearch
|
|
|
|
| 362 |
rr.append(d["_source"])
|
| 363 |
return rr
|
| 364 |
|
| 365 |
+
def getFields(self, res, fields: list[str]) -> dict[str, dict]:
|
| 366 |
res_fields = {}
|
| 367 |
if not fields:
|
| 368 |
return {}
|
|
|
|
| 381 |
res_fields[d["id"]] = m
|
| 382 |
return res_fields
|
| 383 |
|
| 384 |
+
def getHighlight(self, res, keywords: list[str], fieldnm: str):
|
| 385 |
ans = {}
|
| 386 |
for d in res["hits"]["hits"]:
|
| 387 |
hlts = d.get("highlight")
|
rag/utils/infinity_conn.py
CHANGED
|
@@ -3,7 +3,6 @@ import os
|
|
| 3 |
import re
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
-
from typing import List, Dict
|
| 7 |
import infinity
|
| 8 |
from infinity.common import ConflictType, InfinityException
|
| 9 |
from infinity.index import IndexInfo, IndexType
|
|
@@ -384,7 +383,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 384 |
def getChunkIds(self, res):
|
| 385 |
return list(res["id"])
|
| 386 |
|
| 387 |
-
def getFields(self, res, fields:
|
| 388 |
res_fields = {}
|
| 389 |
if not fields:
|
| 390 |
return {}
|
|
@@ -412,7 +411,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 412 |
res_fields[id] = m
|
| 413 |
return res_fields
|
| 414 |
|
| 415 |
-
def getHighlight(self, res, keywords:
|
| 416 |
ans = {}
|
| 417 |
num_rows = len(res)
|
| 418 |
column_id = res["id"]
|
|
|
|
| 3 |
import re
|
| 4 |
import json
|
| 5 |
import time
|
|
|
|
| 6 |
import infinity
|
| 7 |
from infinity.common import ConflictType, InfinityException
|
| 8 |
from infinity.index import IndexInfo, IndexType
|
|
|
|
| 383 |
def getChunkIds(self, res):
|
| 384 |
return list(res["id"])
|
| 385 |
|
| 386 |
+
def getFields(self, res, fields: list[str]) -> list[str, dict]:
|
| 387 |
res_fields = {}
|
| 388 |
if not fields:
|
| 389 |
return {}
|
|
|
|
| 411 |
res_fields[id] = m
|
| 412 |
return res_fields
|
| 413 |
|
| 414 |
+
def getHighlight(self, res, keywords: list[str], fieldnm: str):
|
| 415 |
ans = {}
|
| 416 |
num_rows = len(res)
|
| 417 |
column_id = res["id"]
|
sdk/python/poetry.lock
CHANGED
|
@@ -1,5 +1,23 @@
|
|
| 1 |
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
[[package]]
|
| 4 |
name = "certifi"
|
| 5 |
version = "2024.8.30"
|
|
@@ -177,13 +195,13 @@ files = [
|
|
| 177 |
|
| 178 |
[[package]]
|
| 179 |
name = "packaging"
|
| 180 |
-
version = "24.
|
| 181 |
description = "Core utilities for Python packages"
|
| 182 |
optional = false
|
| 183 |
python-versions = ">=3.8"
|
| 184 |
files = [
|
| 185 |
-
{file = "packaging-24.
|
| 186 |
-
{file = "packaging-24.
|
| 187 |
]
|
| 188 |
|
| 189 |
[[package]]
|
|
@@ -246,13 +264,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
|
| 246 |
|
| 247 |
[[package]]
|
| 248 |
name = "tomli"
|
| 249 |
-
version = "2.0
|
| 250 |
description = "A lil' TOML parser"
|
| 251 |
optional = false
|
| 252 |
python-versions = ">=3.8"
|
| 253 |
files = [
|
| 254 |
-
{file = "tomli-2.0
|
| 255 |
-
{file = "tomli-2.0.
|
| 256 |
]
|
| 257 |
|
| 258 |
[[package]]
|
|
@@ -275,4 +293,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
|
| 275 |
[metadata]
|
| 276 |
lock-version = "2.0"
|
| 277 |
python-versions = "^3.10"
|
| 278 |
-
content-hash = "
|
|
|
|
| 1 |
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
| 2 |
|
| 3 |
+
[[package]]
|
| 4 |
+
name = "beartype"
|
| 5 |
+
version = "0.18.5"
|
| 6 |
+
description = "Unbearably fast runtime type checking in pure Python."
|
| 7 |
+
optional = false
|
| 8 |
+
python-versions = ">=3.8.0"
|
| 9 |
+
files = [
|
| 10 |
+
{file = "beartype-0.18.5-py3-none-any.whl", hash = "sha256:5301a14f2a9a5540fe47ec6d34d758e9cd8331d36c4760fc7a5499ab86310089"},
|
| 11 |
+
{file = "beartype-0.18.5.tar.gz", hash = "sha256:264ddc2f1da9ec94ff639141fbe33d22e12a9f75aa863b83b7046ffff1381927"},
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
[package.extras]
|
| 15 |
+
all = ["typing-extensions (>=3.10.0.0)"]
|
| 16 |
+
dev = ["autoapi (>=0.9.0)", "coverage (>=5.5)", "equinox", "mypy (>=0.800)", "numpy", "pandera", "pydata-sphinx-theme (<=0.7.2)", "pytest (>=4.0.0)", "sphinx", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)", "tox (>=3.20.1)", "typing-extensions (>=3.10.0.0)"]
|
| 17 |
+
doc-rtd = ["autoapi (>=0.9.0)", "pydata-sphinx-theme (<=0.7.2)", "sphinx (>=4.2.0,<6.0.0)", "sphinxext-opengraph (>=0.7.5)"]
|
| 18 |
+
test-tox = ["equinox", "mypy (>=0.800)", "numpy", "pandera", "pytest (>=4.0.0)", "sphinx", "typing-extensions (>=3.10.0.0)"]
|
| 19 |
+
test-tox-coverage = ["coverage (>=5.5)"]
|
| 20 |
+
|
| 21 |
[[package]]
|
| 22 |
name = "certifi"
|
| 23 |
version = "2024.8.30"
|
|
|
|
| 195 |
|
| 196 |
[[package]]
|
| 197 |
name = "packaging"
|
| 198 |
+
version = "24.2"
|
| 199 |
description = "Core utilities for Python packages"
|
| 200 |
optional = false
|
| 201 |
python-versions = ">=3.8"
|
| 202 |
files = [
|
| 203 |
+
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
| 204 |
+
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
|
| 205 |
]
|
| 206 |
|
| 207 |
[[package]]
|
|
|
|
| 264 |
|
| 265 |
[[package]]
|
| 266 |
name = "tomli"
|
| 267 |
+
version = "2.1.0"
|
| 268 |
description = "A lil' TOML parser"
|
| 269 |
optional = false
|
| 270 |
python-versions = ">=3.8"
|
| 271 |
files = [
|
| 272 |
+
{file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"},
|
| 273 |
+
{file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"},
|
| 274 |
]
|
| 275 |
|
| 276 |
[[package]]
|
|
|
|
| 293 |
[metadata]
|
| 294 |
lock-version = "2.0"
|
| 295 |
python-versions = "^3.10"
|
| 296 |
+
content-hash = "19565d31d822b0573f505662c664d735194134a505f43bbd1657c033f87bb82d"
|
sdk/python/pyproject.toml
CHANGED
|
@@ -10,6 +10,7 @@ package-mode = true
|
|
| 10 |
[tool.poetry.dependencies]
|
| 11 |
python = "^3.10"
|
| 12 |
requests = "^2.30.0"
|
|
|
|
| 13 |
pytest = "^8.0.0"
|
| 14 |
|
| 15 |
|
|
|
|
| 10 |
[tool.poetry.dependencies]
|
| 11 |
python = "^3.10"
|
| 12 |
requests = "^2.30.0"
|
| 13 |
+
beartype = "^0.18.5"
|
| 14 |
pytest = "^8.0.0"
|
| 15 |
|
| 16 |
|
sdk/python/ragflow_sdk/__init__.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import importlib.metadata
|
| 2 |
|
| 3 |
__version__ = importlib.metadata.version("ragflow_sdk")
|
|
|
|
| 1 |
+
from beartype.claw import beartype_this_package
|
| 2 |
+
beartype_this_package() # <-- raise exceptions in your code
|
| 3 |
+
|
| 4 |
import importlib.metadata
|
| 5 |
|
| 6 |
__version__ = importlib.metadata.version("ragflow_sdk")
|
sdk/python/ragflow_sdk/modules/chat.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
from typing import List
|
| 2 |
|
| 3 |
from .base import Base
|
| 4 |
from .session import Session
|
|
@@ -58,7 +57,7 @@ class Chat(Base):
|
|
| 58 |
raise Exception(res["message"])
|
| 59 |
|
| 60 |
def list_sessions(self,page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 61 |
-
id: str = None, name: str = None) ->
|
| 62 |
res = self.get(f'/chats/{self.id}/sessions',{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name} )
|
| 63 |
res = res.json()
|
| 64 |
if res.get("code") == 0:
|
|
@@ -68,7 +67,7 @@ class Chat(Base):
|
|
| 68 |
return result_list
|
| 69 |
raise Exception(res["message"])
|
| 70 |
|
| 71 |
-
def delete_sessions(self,ids:
|
| 72 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
| 73 |
res = res.json()
|
| 74 |
if res.get("code") != 0:
|
|
|
|
|
|
|
| 1 |
|
| 2 |
from .base import Base
|
| 3 |
from .session import Session
|
|
|
|
| 57 |
raise Exception(res["message"])
|
| 58 |
|
| 59 |
def list_sessions(self,page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 60 |
+
id: str = None, name: str = None) -> list[Session]:
|
| 61 |
res = self.get(f'/chats/{self.id}/sessions',{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name} )
|
| 62 |
res = res.json()
|
| 63 |
if res.get("code") == 0:
|
|
|
|
| 67 |
return result_list
|
| 68 |
raise Exception(res["message"])
|
| 69 |
|
| 70 |
+
def delete_sessions(self,ids: list[str] | None = None):
|
| 71 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
| 72 |
res = res.json()
|
| 73 |
if res.get("code") != 0:
|
sdk/python/ragflow_sdk/modules/dataset.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
from typing import List
|
| 2 |
-
|
| 3 |
from .document import Document
|
| 4 |
|
| 5 |
from .base import Base
|
|
@@ -35,7 +33,7 @@ class DataSet(Base):
|
|
| 35 |
if res.get("code") != 0:
|
| 36 |
raise Exception(res["message"])
|
| 37 |
|
| 38 |
-
def upload_documents(self,document_list:
|
| 39 |
url = f"/datasets/{self.id}/documents"
|
| 40 |
files = [("file",(ele["displayed_name"],ele["blob"])) for ele in document_list]
|
| 41 |
res = self.post(path=url,json=None,files=files)
|
|
@@ -48,7 +46,7 @@ class DataSet(Base):
|
|
| 48 |
return doc_list
|
| 49 |
raise Exception(res.get("message"))
|
| 50 |
|
| 51 |
-
def list_documents(self, id: str = None, keywords: str = None, page: int =1, page_size: int = 30, orderby: str = "create_time", desc: bool = True):
|
| 52 |
res = self.get(f"/datasets/{self.id}/documents",params={"id": id,"keywords": keywords,"page": page,"page_size": page_size,"orderby": orderby,"desc": desc})
|
| 53 |
res = res.json()
|
| 54 |
documents = []
|
|
@@ -58,7 +56,7 @@ class DataSet(Base):
|
|
| 58 |
return documents
|
| 59 |
raise Exception(res["message"])
|
| 60 |
|
| 61 |
-
def delete_documents(self,ids:
|
| 62 |
res = self.rm(f"/datasets/{self.id}/documents",{"ids":ids})
|
| 63 |
res = res.json()
|
| 64 |
if res.get("code") != 0:
|
|
|
|
|
|
|
|
|
|
| 1 |
from .document import Document
|
| 2 |
|
| 3 |
from .base import Base
|
|
|
|
| 33 |
if res.get("code") != 0:
|
| 34 |
raise Exception(res["message"])
|
| 35 |
|
| 36 |
+
def upload_documents(self,document_list: list[dict]):
|
| 37 |
url = f"/datasets/{self.id}/documents"
|
| 38 |
files = [("file",(ele["displayed_name"],ele["blob"])) for ele in document_list]
|
| 39 |
res = self.post(path=url,json=None,files=files)
|
|
|
|
| 46 |
return doc_list
|
| 47 |
raise Exception(res.get("message"))
|
| 48 |
|
| 49 |
+
def list_documents(self, id: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True):
|
| 50 |
res = self.get(f"/datasets/{self.id}/documents",params={"id": id,"keywords": keywords,"page": page,"page_size": page_size,"orderby": orderby,"desc": desc})
|
| 51 |
res = res.json()
|
| 52 |
documents = []
|
|
|
|
| 56 |
return documents
|
| 57 |
raise Exception(res["message"])
|
| 58 |
|
| 59 |
+
def delete_documents(self,ids: list[str] | None = None):
|
| 60 |
res = self.rm(f"/datasets/{self.id}/documents",{"ids":ids})
|
| 61 |
res = res.json()
|
| 62 |
if res.get("code") != 0:
|
sdk/python/ragflow_sdk/modules/document.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
from .base import Base
|
| 3 |
from .chunk import Chunk
|
| 4 |
-
from typing import List
|
| 5 |
|
| 6 |
|
| 7 |
class Document(Base):
|
|
@@ -63,14 +62,14 @@ class Document(Base):
|
|
| 63 |
raise Exception(res.get("message"))
|
| 64 |
|
| 65 |
|
| 66 |
-
def add_chunk(self, content: str,important_keywords:
|
| 67 |
res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
|
| 68 |
res = res.json()
|
| 69 |
if res.get("code") == 0:
|
| 70 |
return Chunk(self.rag,res["data"].get("chunk"))
|
| 71 |
raise Exception(res.get("message"))
|
| 72 |
|
| 73 |
-
def delete_chunks(self,ids:
|
| 74 |
res = self.rm(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks",{"chunk_ids":ids})
|
| 75 |
res = res.json()
|
| 76 |
if res.get("code")!=0:
|
|
|
|
| 1 |
import json
|
| 2 |
from .base import Base
|
| 3 |
from .chunk import Chunk
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
class Document(Base):
|
|
|
|
| 62 |
raise Exception(res.get("message"))
|
| 63 |
|
| 64 |
|
| 65 |
+
def add_chunk(self, content: str,important_keywords: list[str] = []):
|
| 66 |
res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
|
| 67 |
res = res.json()
|
| 68 |
if res.get("code") == 0:
|
| 69 |
return Chunk(self.rag,res["data"].get("chunk"))
|
| 70 |
raise Exception(res.get("message"))
|
| 71 |
|
| 72 |
+
def delete_chunks(self,ids:list[str] | None = None):
|
| 73 |
res = self.rm(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks",{"chunk_ids":ids})
|
| 74 |
res = res.json()
|
| 75 |
if res.get("code")!=0:
|
sdk/python/ragflow_sdk/ragflow.py
CHANGED
|
@@ -13,14 +13,11 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
|
| 16 |
-
from typing import List
|
| 17 |
-
|
| 18 |
import requests
|
| 19 |
|
| 20 |
from .modules.chat import Chat
|
| 21 |
from .modules.chunk import Chunk
|
| 22 |
from .modules.dataset import DataSet
|
| 23 |
-
from .modules.document import Document
|
| 24 |
|
| 25 |
|
| 26 |
class RAGFlow:
|
|
@@ -64,7 +61,7 @@ class RAGFlow:
|
|
| 64 |
return DataSet(self, res["data"])
|
| 65 |
raise Exception(res["message"])
|
| 66 |
|
| 67 |
-
def delete_datasets(self, ids:
|
| 68 |
res = self.delete("/datasets",{"ids": ids})
|
| 69 |
res=res.json()
|
| 70 |
if res.get("code") != 0:
|
|
@@ -77,8 +74,8 @@ class RAGFlow:
|
|
| 77 |
raise Exception("Dataset %s not found" % name)
|
| 78 |
|
| 79 |
def list_datasets(self, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 80 |
-
id: str = None, name: str = None) -> \
|
| 81 |
-
|
| 82 |
res = self.get("/datasets",
|
| 83 |
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
| 84 |
res = res.json()
|
|
@@ -89,8 +86,8 @@ class RAGFlow:
|
|
| 89 |
return result_list
|
| 90 |
raise Exception(res["message"])
|
| 91 |
|
| 92 |
-
def create_chat(self, name: str, avatar: str = "", dataset_ids:
|
| 93 |
-
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
|
| 94 |
dataset_list = []
|
| 95 |
for id in dataset_ids:
|
| 96 |
dataset_list.append(id)
|
|
@@ -135,7 +132,7 @@ class RAGFlow:
|
|
| 135 |
return Chat(self, res["data"])
|
| 136 |
raise Exception(res["message"])
|
| 137 |
|
| 138 |
-
def delete_chats(self,ids:
|
| 139 |
res = self.delete('/chats',
|
| 140 |
{"ids":ids})
|
| 141 |
res = res.json()
|
|
@@ -143,7 +140,7 @@ class RAGFlow:
|
|
| 143 |
raise Exception(res["message"])
|
| 144 |
|
| 145 |
def list_chats(self, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 146 |
-
id: str = None, name: str = None) ->
|
| 147 |
res = self.get("/chats",{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
| 148 |
res = res.json()
|
| 149 |
result_list = []
|
|
@@ -154,7 +151,7 @@ class RAGFlow:
|
|
| 154 |
raise Exception(res["message"])
|
| 155 |
|
| 156 |
|
| 157 |
-
def retrieve(self, dataset_ids, document_ids=None, question="", page=1, page_size=30, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ):
|
| 158 |
if document_ids is None:
|
| 159 |
document_ids = []
|
| 160 |
data_json ={
|
|
@@ -170,7 +167,7 @@ class RAGFlow:
|
|
| 170 |
"documents": document_ids
|
| 171 |
}
|
| 172 |
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
|
| 173 |
-
res = self.post(
|
| 174 |
res = res.json()
|
| 175 |
if res.get("code") ==0:
|
| 176 |
chunks=[]
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
|
|
|
|
|
|
|
| 16 |
import requests
|
| 17 |
|
| 18 |
from .modules.chat import Chat
|
| 19 |
from .modules.chunk import Chunk
|
| 20 |
from .modules.dataset import DataSet
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class RAGFlow:
|
|
|
|
| 61 |
return DataSet(self, res["data"])
|
| 62 |
raise Exception(res["message"])
|
| 63 |
|
| 64 |
+
def delete_datasets(self, ids: list[str] | None = None):
|
| 65 |
res = self.delete("/datasets",{"ids": ids})
|
| 66 |
res=res.json()
|
| 67 |
if res.get("code") != 0:
|
|
|
|
| 74 |
raise Exception("Dataset %s not found" % name)
|
| 75 |
|
| 76 |
def list_datasets(self, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 77 |
+
id: str | None = None, name: str | None = None) -> \
|
| 78 |
+
list[DataSet]:
|
| 79 |
res = self.get("/datasets",
|
| 80 |
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
| 81 |
res = res.json()
|
|
|
|
| 86 |
return result_list
|
| 87 |
raise Exception(res["message"])
|
| 88 |
|
| 89 |
+
def create_chat(self, name: str, avatar: str = "", dataset_ids: list[str] = [],
|
| 90 |
+
llm: Chat.LLM | None = None, prompt: Chat.Prompt | None = None) -> Chat:
|
| 91 |
dataset_list = []
|
| 92 |
for id in dataset_ids:
|
| 93 |
dataset_list.append(id)
|
|
|
|
| 132 |
return Chat(self, res["data"])
|
| 133 |
raise Exception(res["message"])
|
| 134 |
|
| 135 |
+
def delete_chats(self,ids: list[str] | None = None):
|
| 136 |
res = self.delete('/chats',
|
| 137 |
{"ids":ids})
|
| 138 |
res = res.json()
|
|
|
|
| 140 |
raise Exception(res["message"])
|
| 141 |
|
| 142 |
def list_chats(self, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True,
|
| 143 |
+
id: str | None = None, name: str | None = None) -> list[Chat]:
|
| 144 |
res = self.get("/chats",{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
| 145 |
res = res.json()
|
| 146 |
result_list = []
|
|
|
|
| 151 |
raise Exception(res["message"])
|
| 152 |
|
| 153 |
|
| 154 |
+
def retrieve(self, dataset_ids, document_ids=None, question="", page=1, page_size=30, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id: str | None = None, keyword:bool=False, ):
|
| 155 |
if document_ids is None:
|
| 156 |
document_ids = []
|
| 157 |
data_json ={
|
|
|
|
| 167 |
"documents": document_ids
|
| 168 |
}
|
| 169 |
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
|
| 170 |
+
res = self.post('/retrieval',json=data_json)
|
| 171 |
res = res.json()
|
| 172 |
if res.get("code") ==0:
|
| 173 |
chunks=[]
|