Added doc for switching elasticsearch to infinity (#3370)
Browse files### What problem does this PR solve?
Added doc for switching elasticsearch to infinity
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
- .github/workflows/tests.yml +20 -1
- README.md +25 -4
- README_id.md +1 -1
- README_ko.md +1 -1
- README_zh.md +1 -1
- api/settings.py +6 -2
- conf/service_conf.yaml +2 -2
- docker/.env +11 -0
- docker/docker-compose-base.yml +29 -25
- docker/docker-compose.yml +0 -2
- docker/service_conf.yaml.template +1 -1
- docs/guides/develop/build_docker_image.mdx +17 -0
- rag/utils/es_conn.py +16 -12
- rag/utils/infinity_conn.py +25 -8
- sdk/python/test/t_chunk.py +2 -2
.github/workflows/tests.yml
CHANGED
|
@@ -70,7 +70,7 @@ jobs:
|
|
| 70 |
echo "RAGFLOW_IMAGE=infiniflow/ragflow:dev" >> docker/.env
|
| 71 |
sudo docker compose -f docker/docker-compose.yml up -d
|
| 72 |
|
| 73 |
-
- name: Run tests
|
| 74 |
run: |
|
| 75 |
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
| 76 |
export HOST_ADDRESS=http://host.docker.internal:9380
|
|
@@ -84,3 +84,22 @@ jobs:
|
|
| 84 |
if: always() # always run this step even if previous steps failed
|
| 85 |
run: |
|
| 86 |
sudo docker compose -f docker/docker-compose.yml down -v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
echo "RAGFLOW_IMAGE=infiniflow/ragflow:dev" >> docker/.env
|
| 71 |
sudo docker compose -f docker/docker-compose.yml up -d
|
| 72 |
|
| 73 |
+
- name: Run tests against Elasticsearch
|
| 74 |
run: |
|
| 75 |
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
| 76 |
export HOST_ADDRESS=http://host.docker.internal:9380
|
|
|
|
| 84 |
if: always() # always run this step even if previous steps failed
|
| 85 |
run: |
|
| 86 |
sudo docker compose -f docker/docker-compose.yml down -v
|
| 87 |
+
|
| 88 |
+
- name: Start ragflow:dev
|
| 89 |
+
run: |
|
| 90 |
+
sudo DOC_ENGINE=infinity docker compose -f docker/docker-compose.yml up -d
|
| 91 |
+
|
| 92 |
+
- name: Run tests against Infinity
|
| 93 |
+
run: |
|
| 94 |
+
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
| 95 |
+
export HOST_ADDRESS=http://host.docker.internal:9380
|
| 96 |
+
until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do
|
| 97 |
+
echo "Waiting for service to be available..."
|
| 98 |
+
sleep 5
|
| 99 |
+
done
|
| 100 |
+
cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest --tb=short t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py
|
| 101 |
+
|
| 102 |
+
- name: Stop ragflow:dev
|
| 103 |
+
if: always() # always run this step even if previous steps failed
|
| 104 |
+
run: |
|
| 105 |
+
sudo DOC_ENGINE=infinity docker compose -f docker/docker-compose.yml down -v
|
README.md
CHANGED
|
@@ -175,14 +175,14 @@ releases! 🌟
|
|
| 175 |
$ docker compose -f docker-compose.yml up -d
|
| 176 |
```
|
| 177 |
|
| 178 |
-
> - To download a RAGFlow slim Docker image of a specific version, update the `
|
| 179 |
*docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.13.0-slim`. After
|
| 180 |
making this change, rerun the command above to initiate the download.
|
| 181 |
> - To download the dev version of RAGFlow Docker image *including* embedding models and Python libraries, update the
|
| 182 |
-
`
|
| 183 |
rerun the command above to initiate the download.
|
| 184 |
> - To download a specific version of RAGFlow Docker image *including* embedding models and Python libraries, update
|
| 185 |
-
the `
|
| 186 |
`RAGFLOW_IMAGE=infiniflow/ragflow:v0.13.0`. After making this change, rerun the command above to initiate the
|
| 187 |
download.
|
| 188 |
|
|
@@ -210,7 +210,7 @@ releases! 🌟
|
|
| 210 |
* Running on http://x.x.x.x:9380
|
| 211 |
INFO:werkzeug:Press CTRL+C to quit
|
| 212 |
```
|
| 213 |
-
> If you skip this confirmation step and directly log in to RAGFlow, your browser may prompt a `network
|
| 214 |
error because, at that moment, your RAGFlow may not be fully initialized.
|
| 215 |
|
| 216 |
5. In your web browser, enter the IP address of your server and log in to RAGFlow.
|
|
@@ -244,6 +244,27 @@ Updates to the above configurations require a reboot of all containers to take e
|
|
| 244 |
> $ docker compose -f docker/docker-compose.yml up -d
|
| 245 |
> ```
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
## 🔧 Build a Docker image without embedding models
|
| 248 |
|
| 249 |
This image is approximately 1 GB in size and relies on external LLM and embedding services.
|
|
|
|
| 175 |
$ docker compose -f docker-compose.yml up -d
|
| 176 |
```
|
| 177 |
|
| 178 |
+
> - To download a RAGFlow slim Docker image of a specific version, update the `RAGFLOW_IMAGE` variable in *
|
| 179 |
*docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.13.0-slim`. After
|
| 180 |
making this change, rerun the command above to initiate the download.
|
| 181 |
> - To download the dev version of RAGFlow Docker image *including* embedding models and Python libraries, update the
|
| 182 |
+
`RAGFLOW_IMAGE` variable in **docker/.env** to `RAGFLOW_IMAGE=infiniflow/ragflow:dev`. After making this change,
|
| 183 |
rerun the command above to initiate the download.
|
| 184 |
> - To download a specific version of RAGFlow Docker image *including* embedding models and Python libraries, update
|
| 185 |
+
the `RAGFLOW_IMAGE` variable in **docker/.env** to your desired version. For example,
|
| 186 |
`RAGFLOW_IMAGE=infiniflow/ragflow:v0.13.0`. After making this change, rerun the command above to initiate the
|
| 187 |
download.
|
| 188 |
|
|
|
|
| 210 |
* Running on http://x.x.x.x:9380
|
| 211 |
INFO:werkzeug:Press CTRL+C to quit
|
| 212 |
```
|
| 213 |
+
> If you skip this confirmation step and directly log in to RAGFlow, your browser may prompt a `network anormal`
|
| 214 |
error because, at that moment, your RAGFlow may not be fully initialized.
|
| 215 |
|
| 216 |
5. In your web browser, enter the IP address of your server and log in to RAGFlow.
|
|
|
|
| 244 |
> $ docker compose -f docker/docker-compose.yml up -d
|
| 245 |
> ```
|
| 246 |
|
| 247 |
+
### Switch doc engine from Elasticsearch to Infinity
|
| 248 |
+
|
| 249 |
+
RAGFlow uses Elasticsearch by default for storing full text and vectors. To switch to [Infinity](https://github.com/infiniflow/infinity/), follow these steps:
|
| 250 |
+
|
| 251 |
+
1. Stop all running containers:
|
| 252 |
+
|
| 253 |
+
```bash
|
| 254 |
+
$ docker compose -f docker/docker-compose.yml down -v
|
| 255 |
+
```
|
| 256 |
+
|
| 257 |
+
2. Set `DOC_ENGINE` in **docker/.env** to `infinity`.
|
| 258 |
+
|
| 259 |
+
3. Start the containers:
|
| 260 |
+
|
| 261 |
+
```bash
|
| 262 |
+
$ docker compose -f docker/docker-compose.yml up -d
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
> [!WARNING]
|
| 266 |
+
> Switching to Infinity on a Linux/arm64 machine is not yet officially supported.
|
| 267 |
+
|
| 268 |
## 🔧 Build a Docker image without embedding models
|
| 269 |
|
| 270 |
This image is approximately 1 GB in size and relies on external LLM and embedding services.
|
README_id.md
CHANGED
|
@@ -202,7 +202,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
|
| 202 |
* Running on http://x.x.x.x:9380
|
| 203 |
INFO:werkzeug:Press CTRL+C to quit
|
| 204 |
```
|
| 205 |
-
> Jika Anda melewatkan langkah ini dan langsung login ke RAGFlow, browser Anda mungkin menampilkan error `network
|
| 206 |
karena RAGFlow mungkin belum sepenuhnya siap.
|
| 207 |
|
| 208 |
5. Buka browser web Anda, masukkan alamat IP server Anda, dan login ke RAGFlow.
|
|
|
|
| 202 |
* Running on http://x.x.x.x:9380
|
| 203 |
INFO:werkzeug:Press CTRL+C to quit
|
| 204 |
```
|
| 205 |
+
> Jika Anda melewatkan langkah ini dan langsung login ke RAGFlow, browser Anda mungkin menampilkan error `network anormal`
|
| 206 |
karena RAGFlow mungkin belum sepenuhnya siap.
|
| 207 |
|
| 208 |
5. Buka browser web Anda, masukkan alamat IP server Anda, dan login ke RAGFlow.
|
README_ko.md
CHANGED
|
@@ -179,7 +179,7 @@
|
|
| 179 |
* Running on http://x.x.x.x:9380
|
| 180 |
INFO:werkzeug:Press CTRL+C to quit
|
| 181 |
```
|
| 182 |
-
> 만약 확인 단계를 건너뛰고 바로 RAGFlow에 로그인하면, RAGFlow가 완전히 초기화되지 않았기 때문에 브라우저에서 `network
|
| 183 |
|
| 184 |
5. 웹 브라우저에 서버의 IP 주소를 입력하고 RAGFlow에 로그인하세요.
|
| 185 |
> 기본 설정을 사용할 경우, `http://IP_OF_YOUR_MACHINE`만 입력하면 됩니다 (포트 번호는 제외). 기본 HTTP 서비스 포트 `80`은 기본 구성으로 사용할 때 생략할 수 있습니다.
|
|
|
|
| 179 |
* Running on http://x.x.x.x:9380
|
| 180 |
INFO:werkzeug:Press CTRL+C to quit
|
| 181 |
```
|
| 182 |
+
> 만약 확인 단계를 건너뛰고 바로 RAGFlow에 로그인하면, RAGFlow가 완전히 초기화되지 않았기 때문에 브라우저에서 `network anormal` 오류가 발생할 수 있습니다.
|
| 183 |
|
| 184 |
5. 웹 브라우저에 서버의 IP 주소를 입력하고 RAGFlow에 로그인하세요.
|
| 185 |
> 기본 설정을 사용할 경우, `http://IP_OF_YOUR_MACHINE`만 입력하면 됩니다 (포트 번호는 제외). 기본 HTTP 서비스 포트 `80`은 기본 구성으로 사용할 때 생략할 수 있습니다.
|
README_zh.md
CHANGED
|
@@ -174,7 +174,7 @@
|
|
| 174 |
* Running on http://x.x.x.x:9380
|
| 175 |
INFO:werkzeug:Press CTRL+C to quit
|
| 176 |
```
|
| 177 |
-
> 如果您跳过这一步系统确认步骤就登录 RAGFlow,你的浏览器有可能会提示 `network
|
| 178 |
|
| 179 |
5. 在你的浏览器中输入你的服务器对应的 IP 地址并登录 RAGFlow。
|
| 180 |
> 上面这个例子中,您只需输入 http://IP_OF_YOUR_MACHINE 即可:未改动过配置则无需输入端口(默认的 HTTP 服务端口 80)。
|
|
|
|
| 174 |
* Running on http://x.x.x.x:9380
|
| 175 |
INFO:werkzeug:Press CTRL+C to quit
|
| 176 |
```
|
| 177 |
+
> 如果您跳过这一步系统确认步骤就登录 RAGFlow,你的浏览器有可能会提示 `network anormal` 或 `网络异常`,因为 RAGFlow 可能并未完全启动成功。
|
| 178 |
|
| 179 |
5. 在你的浏览器中输入你的服务器对应的 IP 地址并登录 RAGFlow。
|
| 180 |
> 上面这个例子中,您只需输入 http://IP_OF_YOUR_MACHINE 即可:未改动过配置则无需输入端口(默认的 HTTP 服务端口 80)。
|
api/settings.py
CHANGED
|
@@ -191,10 +191,14 @@ AUTHENTICATION_DEFAULT_TIMEOUT = 7 * 24 * 60 * 60 # s
|
|
| 191 |
PRIVILEGE_COMMAND_WHITELIST = []
|
| 192 |
CHECK_NODES_IDENTITY = False
|
| 193 |
|
| 194 |
-
|
|
|
|
| 195 |
docStoreConn = rag.utils.es_conn.ESConnection()
|
| 196 |
-
|
| 197 |
docStoreConn = rag.utils.infinity_conn.InfinityConnection()
|
|
|
|
|
|
|
|
|
|
| 198 |
retrievaler = search.Dealer(docStoreConn)
|
| 199 |
kg_retrievaler = kg_search.KGSearch(docStoreConn)
|
| 200 |
|
|
|
|
| 191 |
PRIVILEGE_COMMAND_WHITELIST = []
|
| 192 |
CHECK_NODES_IDENTITY = False
|
| 193 |
|
| 194 |
+
DOC_ENGINE = os.environ.get('DOC_ENGINE', "elasticsearch")
|
| 195 |
+
if DOC_ENGINE == "elasticsearch":
|
| 196 |
docStoreConn = rag.utils.es_conn.ESConnection()
|
| 197 |
+
elif DOC_ENGINE == "infinity":
|
| 198 |
docStoreConn = rag.utils.infinity_conn.InfinityConnection()
|
| 199 |
+
else:
|
| 200 |
+
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
| 201 |
+
|
| 202 |
retrievaler = search.Dealer(docStoreConn)
|
| 203 |
kg_retrievaler = kg_search.KGSearch(docStoreConn)
|
| 204 |
|
conf/service_conf.yaml
CHANGED
|
@@ -6,7 +6,7 @@ mysql:
|
|
| 6 |
user: 'root'
|
| 7 |
password: 'infini_rag_flow'
|
| 8 |
host: 'mysql'
|
| 9 |
-
port:
|
| 10 |
max_connections: 100
|
| 11 |
stale_timeout: 30
|
| 12 |
minio:
|
|
@@ -14,7 +14,7 @@ minio:
|
|
| 14 |
password: 'infini_rag_flow'
|
| 15 |
host: 'minio:9000'
|
| 16 |
es:
|
| 17 |
-
hosts: 'http://es01:
|
| 18 |
username: 'elastic'
|
| 19 |
password: 'infini_rag_flow'
|
| 20 |
redis:
|
|
|
|
| 6 |
user: 'root'
|
| 7 |
password: 'infini_rag_flow'
|
| 8 |
host: 'mysql'
|
| 9 |
+
port: 5455
|
| 10 |
max_connections: 100
|
| 11 |
stale_timeout: 30
|
| 12 |
minio:
|
|
|
|
| 14 |
password: 'infini_rag_flow'
|
| 15 |
host: 'minio:9000'
|
| 16 |
es:
|
| 17 |
+
hosts: 'http://es01:1200'
|
| 18 |
username: 'elastic'
|
| 19 |
password: 'infini_rag_flow'
|
| 20 |
redis:
|
docker/.env
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# The version of Elasticsearch.
|
| 2 |
STACK_VERSION=8.11.3
|
| 3 |
|
|
|
|
| 1 |
+
# The type of doc engine to use.
|
| 2 |
+
# Supported values are `elasticsearch`, `infinity`.
|
| 3 |
+
DOC_ENGINE=${DOC_ENGINE:-elasticsearch}
|
| 4 |
+
|
| 5 |
+
# ------------------------------
|
| 6 |
+
# docker env var for specifying vector db type at startup
|
| 7 |
+
# (based on the vector db type, the corresponding docker
|
| 8 |
+
# compose profile will be used)
|
| 9 |
+
# ------------------------------
|
| 10 |
+
COMPOSE_PROFILES=${DOC_ENGINE}
|
| 11 |
+
|
| 12 |
# The version of Elasticsearch.
|
| 13 |
STACK_VERSION=8.11.3
|
| 14 |
|
docker/docker-compose-base.yml
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
services:
|
| 2 |
es01:
|
| 3 |
container_name: ragflow-es-01
|
|
|
|
|
|
|
| 4 |
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
| 5 |
volumes:
|
| 6 |
- esdata01:/usr/share/elasticsearch/data
|
|
@@ -30,31 +32,33 @@ services:
|
|
| 30 |
- ragflow
|
| 31 |
restart: on-failure
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
mysql:
|
|
|
|
| 1 |
services:
|
| 2 |
es01:
|
| 3 |
container_name: ragflow-es-01
|
| 4 |
+
profiles:
|
| 5 |
+
- elasticsearch
|
| 6 |
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
| 7 |
volumes:
|
| 8 |
- esdata01:/usr/share/elasticsearch/data
|
|
|
|
| 32 |
- ragflow
|
| 33 |
restart: on-failure
|
| 34 |
|
| 35 |
+
infinity:
|
| 36 |
+
container_name: ragflow-infinity
|
| 37 |
+
profiles:
|
| 38 |
+
- infinity
|
| 39 |
+
image: infiniflow/infinity:v0.5.0-dev2
|
| 40 |
+
volumes:
|
| 41 |
+
- infinity_data:/var/infinity
|
| 42 |
+
ports:
|
| 43 |
+
- ${INFINITY_THRIFT_PORT}:23817
|
| 44 |
+
- ${INFINITY_HTTP_PORT}:23820
|
| 45 |
+
- ${INFINITY_PSQL_PORT}:5432
|
| 46 |
+
env_file: .env
|
| 47 |
+
environment:
|
| 48 |
+
- TZ=${TIMEZONE}
|
| 49 |
+
mem_limit: ${MEM_LIMIT}
|
| 50 |
+
ulimits:
|
| 51 |
+
nofile:
|
| 52 |
+
soft: 500000
|
| 53 |
+
hard: 500000
|
| 54 |
+
networks:
|
| 55 |
+
- ragflow
|
| 56 |
+
healthcheck:
|
| 57 |
+
test: ["CMD", "curl", "http://localhost:23820/admin/node/current"]
|
| 58 |
+
interval: 10s
|
| 59 |
+
timeout: 10s
|
| 60 |
+
retries: 120
|
| 61 |
+
restart: on-failure
|
| 62 |
|
| 63 |
|
| 64 |
mysql:
|
docker/docker-compose.yml
CHANGED
|
@@ -6,8 +6,6 @@ services:
|
|
| 6 |
depends_on:
|
| 7 |
mysql:
|
| 8 |
condition: service_healthy
|
| 9 |
-
es01:
|
| 10 |
-
condition: service_healthy
|
| 11 |
image: ${RAGFLOW_IMAGE}
|
| 12 |
container_name: ragflow-server
|
| 13 |
ports:
|
|
|
|
| 6 |
depends_on:
|
| 7 |
mysql:
|
| 8 |
condition: service_healthy
|
|
|
|
|
|
|
| 9 |
image: ${RAGFLOW_IMAGE}
|
| 10 |
container_name: ragflow-server
|
| 11 |
ports:
|
docker/service_conf.yaml.template
CHANGED
|
@@ -16,7 +16,7 @@ minio:
|
|
| 16 |
es:
|
| 17 |
hosts: 'http://${ES_HOST:-es01}:9200'
|
| 18 |
username: '${ES_USER:-elastic}'
|
| 19 |
-
password: '${
|
| 20 |
redis:
|
| 21 |
db: 1
|
| 22 |
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
|
|
|
| 16 |
es:
|
| 17 |
hosts: 'http://${ES_HOST:-es01}:9200'
|
| 18 |
username: '${ES_USER:-elastic}'
|
| 19 |
+
password: '${ELASTIC_PASSWORD:-infini_rag_flow}'
|
| 20 |
redis:
|
| 21 |
db: 1
|
| 22 |
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
docs/guides/develop/build_docker_image.mdx
CHANGED
|
@@ -61,4 +61,21 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
|
| 61 |
```
|
| 62 |
|
| 63 |
</TabItem>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
</Tabs>
|
|
|
|
| 61 |
```
|
| 62 |
|
| 63 |
</TabItem>
|
| 64 |
+
<TabItem value="linux/arm64">
|
| 65 |
+
|
| 66 |
+
## 🔧 Build a Docker image for linux arm64
|
| 67 |
+
|
| 68 |
+
We are currently unable to regularly build multi-arch images with CI and have no plans to publish arm64 images in the near future.
|
| 69 |
+
However, you can build an image yourself on a linux/arm64 host machine:
|
| 70 |
+
|
| 71 |
+
```bash
|
| 72 |
+
git clone https://github.com/infiniflow/ragflow.git
|
| 73 |
+
cd ragflow/
|
| 74 |
+
pip3 install huggingface-hub nltk
|
| 75 |
+
python3 download_deps.py
|
| 76 |
+
docker build --build-arg ARCH=arm64 -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
| 77 |
+
docker build --build-arg ARCH=arm64 -f Dockerfile -t infiniflow/ragflow:dev .
|
| 78 |
+
```
|
| 79 |
+
</TabItem>
|
| 80 |
+
|
| 81 |
</Tabs>
|
rag/utils/es_conn.py
CHANGED
|
@@ -4,7 +4,6 @@ import time
|
|
| 4 |
import os
|
| 5 |
from typing import List, Dict
|
| 6 |
|
| 7 |
-
import elasticsearch
|
| 8 |
import copy
|
| 9 |
from elasticsearch import Elasticsearch
|
| 10 |
from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
|
|
@@ -17,14 +16,13 @@ import polars as pl
|
|
| 17 |
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr
|
| 18 |
from rag.nlp import is_english, rag_tokenizer
|
| 19 |
|
| 20 |
-
logger.info("Elasticsearch sdk version: "+str(elasticsearch.__version__))
|
| 21 |
-
|
| 22 |
|
| 23 |
@singleton
|
| 24 |
class ESConnection(DocStoreConnection):
|
| 25 |
def __init__(self):
|
| 26 |
self.info = {}
|
| 27 |
-
|
|
|
|
| 28 |
try:
|
| 29 |
self.es = Elasticsearch(
|
| 30 |
settings.ES["hosts"].split(","),
|
|
@@ -34,21 +32,27 @@ class ESConnection(DocStoreConnection):
|
|
| 34 |
)
|
| 35 |
if self.es:
|
| 36 |
self.info = self.es.info()
|
| 37 |
-
logger.info("Connect to es.")
|
| 38 |
break
|
| 39 |
-
except Exception:
|
| 40 |
-
logger.
|
| 41 |
-
time.sleep(
|
| 42 |
if not self.es.ping():
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
v = v["number"].split(".")[0]
|
| 46 |
if int(v) < 8:
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json")
|
| 49 |
if not os.path.exists(fp_mapping):
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
self.mapping = json.load(open(fp_mapping, "r"))
|
|
|
|
| 52 |
|
| 53 |
"""
|
| 54 |
Database operations
|
|
|
|
| 4 |
import os
|
| 5 |
from typing import List, Dict
|
| 6 |
|
|
|
|
| 7 |
import copy
|
| 8 |
from elasticsearch import Elasticsearch
|
| 9 |
from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
|
|
|
|
| 16 |
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr
|
| 17 |
from rag.nlp import is_english, rag_tokenizer
|
| 18 |
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@singleton
|
| 21 |
class ESConnection(DocStoreConnection):
|
| 22 |
def __init__(self):
|
| 23 |
self.info = {}
|
| 24 |
+
logger.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.")
|
| 25 |
+
for _ in range(24):
|
| 26 |
try:
|
| 27 |
self.es = Elasticsearch(
|
| 28 |
settings.ES["hosts"].split(","),
|
|
|
|
| 32 |
)
|
| 33 |
if self.es:
|
| 34 |
self.info = self.es.info()
|
|
|
|
| 35 |
break
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.warn(f"{str(e)}. Waiting Elasticsearch {settings.ES['hosts']} to be healthy.")
|
| 38 |
+
time.sleep(5)
|
| 39 |
if not self.es.ping():
|
| 40 |
+
msg = f"Elasticsearch {settings.ES['hosts']} didn't become healthy in 120s."
|
| 41 |
+
logger.error(msg)
|
| 42 |
+
raise Exception(msg)
|
| 43 |
+
v = self.info.get("version", {"number": "8.11.3"})
|
| 44 |
v = v["number"].split(".")[0]
|
| 45 |
if int(v) < 8:
|
| 46 |
+
msg = f"Elasticsearch version must be greater than or equal to 8, current version: {v}"
|
| 47 |
+
logger.error(msg)
|
| 48 |
+
raise Exception(msg)
|
| 49 |
fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json")
|
| 50 |
if not os.path.exists(fp_mapping):
|
| 51 |
+
msg = f"Elasticsearch mapping file not found at {fp_mapping}"
|
| 52 |
+
logger.error(msg)
|
| 53 |
+
raise Exception(msg)
|
| 54 |
self.mapping = json.load(open(fp_mapping, "r"))
|
| 55 |
+
logger.info(f"Elasticsearch {settings.ES['hosts']} is healthy.")
|
| 56 |
|
| 57 |
"""
|
| 58 |
Database operations
|
rag/utils/infinity_conn.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
|
|
|
| 4 |
from typing import List, Dict
|
| 5 |
import infinity
|
| 6 |
from infinity.common import ConflictType, InfinityException
|
| 7 |
from infinity.index import IndexInfo, IndexType
|
| 8 |
from infinity.connection_pool import ConnectionPool
|
| 9 |
-
from rag import settings
|
| 10 |
from api.utils.log_utils import logger
|
|
|
|
| 11 |
from rag.utils import singleton
|
| 12 |
import polars as pl
|
| 13 |
from polars.series.series import Series
|
|
@@ -54,8 +55,24 @@ class InfinityConnection(DocStoreConnection):
|
|
| 54 |
if ":" in infinity_uri:
|
| 55 |
host, port = infinity_uri.split(":")
|
| 56 |
infinity_uri = infinity.common.NetworkAddress(host, int(port))
|
| 57 |
-
self.connPool =
|
| 58 |
-
logger.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
"""
|
| 61 |
Database operations
|
|
@@ -151,8 +168,8 @@ class InfinityConnection(DocStoreConnection):
|
|
| 151 |
_ = db_instance.get_table(table_name)
|
| 152 |
self.connPool.release_conn(inf_conn)
|
| 153 |
return True
|
| 154 |
-
except Exception:
|
| 155 |
-
logger.
|
| 156 |
return False
|
| 157 |
|
| 158 |
"""
|
|
@@ -199,7 +216,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 199 |
)
|
| 200 |
if len(filter_cond) != 0:
|
| 201 |
filter_fulltext = f"({filter_cond}) AND {filter_fulltext}"
|
| 202 |
-
#
|
| 203 |
minimum_should_match = "0%"
|
| 204 |
if "minimum_should_match" in matchExpr.extra_options:
|
| 205 |
minimum_should_match = (
|
|
@@ -312,7 +329,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 312 |
for k, v in d.items():
|
| 313 |
if k.endswith("_kwd") and isinstance(v, list):
|
| 314 |
d[k] = " ".join(v)
|
| 315 |
-
ids = [
|
| 316 |
str_ids = ", ".join(ids)
|
| 317 |
str_filter = f"id IN ({str_ids})"
|
| 318 |
table_instance.delete(str_filter)
|
|
@@ -321,7 +338,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 321 |
# logger.info(f"InfinityConnection.insert {json.dumps(documents)}")
|
| 322 |
table_instance.insert(documents)
|
| 323 |
self.connPool.release_conn(inf_conn)
|
| 324 |
-
|
| 325 |
return []
|
| 326 |
|
| 327 |
def update(
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
+
import time
|
| 5 |
from typing import List, Dict
|
| 6 |
import infinity
|
| 7 |
from infinity.common import ConflictType, InfinityException
|
| 8 |
from infinity.index import IndexInfo, IndexType
|
| 9 |
from infinity.connection_pool import ConnectionPool
|
|
|
|
| 10 |
from api.utils.log_utils import logger
|
| 11 |
+
from rag import settings
|
| 12 |
from rag.utils import singleton
|
| 13 |
import polars as pl
|
| 14 |
from polars.series.series import Series
|
|
|
|
| 55 |
if ":" in infinity_uri:
|
| 56 |
host, port = infinity_uri.split(":")
|
| 57 |
infinity_uri = infinity.common.NetworkAddress(host, int(port))
|
| 58 |
+
self.connPool = None
|
| 59 |
+
logger.info(f"Use Infinity {infinity_uri} as the doc engine.")
|
| 60 |
+
for _ in range(24):
|
| 61 |
+
try:
|
| 62 |
+
connPool = ConnectionPool(infinity_uri)
|
| 63 |
+
inf_conn = connPool.get_conn()
|
| 64 |
+
_ = inf_conn.show_current_node()
|
| 65 |
+
connPool.release_conn(inf_conn)
|
| 66 |
+
self.connPool = connPool
|
| 67 |
+
break
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.warn(f"{str(e)}. Waiting Infinity {infinity_uri} to be healthy.")
|
| 70 |
+
time.sleep(5)
|
| 71 |
+
if self.connPool is None:
|
| 72 |
+
msg = f"Infinity {infinity_uri} didn't become healthy in 120s."
|
| 73 |
+
logger.error(msg)
|
| 74 |
+
raise Exception(msg)
|
| 75 |
+
logger.info(f"Infinity {infinity_uri} is healthy.")
|
| 76 |
|
| 77 |
"""
|
| 78 |
Database operations
|
|
|
|
| 168 |
_ = db_instance.get_table(table_name)
|
| 169 |
self.connPool.release_conn(inf_conn)
|
| 170 |
return True
|
| 171 |
+
except Exception as e:
|
| 172 |
+
logger.warn(f"INFINITY indexExist {str(e)}")
|
| 173 |
return False
|
| 174 |
|
| 175 |
"""
|
|
|
|
| 216 |
)
|
| 217 |
if len(filter_cond) != 0:
|
| 218 |
filter_fulltext = f"({filter_cond}) AND {filter_fulltext}"
|
| 219 |
+
# logger.info(f"filter_fulltext: {filter_fulltext}")
|
| 220 |
minimum_should_match = "0%"
|
| 221 |
if "minimum_should_match" in matchExpr.extra_options:
|
| 222 |
minimum_should_match = (
|
|
|
|
| 329 |
for k, v in d.items():
|
| 330 |
if k.endswith("_kwd") and isinstance(v, list):
|
| 331 |
d[k] = " ".join(v)
|
| 332 |
+
ids = ["'{}'".format(d["id"]) for d in documents]
|
| 333 |
str_ids = ", ".join(ids)
|
| 334 |
str_filter = f"id IN ({str_ids})"
|
| 335 |
table_instance.delete(str_filter)
|
|
|
|
| 338 |
# logger.info(f"InfinityConnection.insert {json.dumps(documents)}")
|
| 339 |
table_instance.insert(documents)
|
| 340 |
self.connPool.release_conn(inf_conn)
|
| 341 |
+
logger.info(f"inserted into {table_name} {str_ids}.")
|
| 342 |
return []
|
| 343 |
|
| 344 |
def update(
|
sdk/python/test/t_chunk.py
CHANGED
|
@@ -147,7 +147,7 @@ def test_update_chunk_content(get_api_key_fixture):
|
|
| 147 |
docs = ds.upload_documents(documents)
|
| 148 |
doc = docs[0]
|
| 149 |
chunk = doc.add_chunk(content="This is a chunk addition test")
|
| 150 |
-
# For
|
| 151 |
sleep(3)
|
| 152 |
chunk.update({"content":"This is a updated content"})
|
| 153 |
|
|
@@ -168,7 +168,7 @@ def test_update_chunk_available(get_api_key_fixture):
|
|
| 168 |
docs = ds.upload_documents(documents)
|
| 169 |
doc = docs[0]
|
| 170 |
chunk = doc.add_chunk(content="This is a chunk addition test")
|
| 171 |
-
# For
|
| 172 |
sleep(3)
|
| 173 |
chunk.update({"available":0})
|
| 174 |
|
|
|
|
| 147 |
docs = ds.upload_documents(documents)
|
| 148 |
doc = docs[0]
|
| 149 |
chunk = doc.add_chunk(content="This is a chunk addition test")
|
| 150 |
+
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
|
| 151 |
sleep(3)
|
| 152 |
chunk.update({"content":"This is a updated content"})
|
| 153 |
|
|
|
|
| 168 |
docs = ds.upload_documents(documents)
|
| 169 |
doc = docs[0]
|
| 170 |
chunk = doc.add_chunk(content="This is a chunk addition test")
|
| 171 |
+
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
|
| 172 |
sleep(3)
|
| 173 |
chunk.update({"available":0})
|
| 174 |
|