KevinHuSh
commited on
Commit
·
0cfb2df
1
Parent(s):
1356753
fix docker compose issue (#238)
Browse files### What problem does this PR solve?
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._
Issue link:#[[Link the issue
here](https://github.com/infiniflow/ragflow/issues/226)]
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/document_app.py +5 -0
- api/apps/user_app.py +4 -2
- api/db/init_data.py +3 -5
- api/settings.py +2 -0
- docker/docker-compose-CN.yml +5 -107
- docker/docker-compose-base.yml +110 -0
- docker/docker-compose.yml +5 -107
- docker/entrypoint.sh +1 -1
- docker/service_conf.yaml +1 -0
- rag/app/naive.py +49 -4
- rag/app/table.py +2 -2
- rag/llm/chat_model.py +1 -0
- rag/nlp/__init__.py +3 -2
- rag/nlp/search.py +1 -1
api/apps/document_app.py
CHANGED
@@ -65,6 +65,11 @@ def upload():
|
|
65 |
DocumentService.query,
|
66 |
name=file.filename,
|
67 |
kb_id=kb.id)
|
|
|
|
|
|
|
|
|
|
|
68 |
location = filename
|
69 |
while MINIO.obj_exist(kb_id, location):
|
70 |
location += "_"
|
|
|
65 |
DocumentService.query,
|
66 |
name=file.filename,
|
67 |
kb_id=kb.id)
|
68 |
+
filetype = filename_type(filename)
|
69 |
+
if not filetype:
|
70 |
+
return get_data_error_result(
|
71 |
+
retmsg="This type of file has not been supported yet!")
|
72 |
+
|
73 |
location = filename
|
74 |
while MINIO.obj_exist(kb_id, location):
|
75 |
location += "_"
|
api/apps/user_app.py
CHANGED
@@ -25,7 +25,7 @@ from api.utils.api_utils import server_error_response, validate_request
|
|
25 |
from api.utils import get_uuid, get_format_time, decrypt, download_img
|
26 |
from api.db import UserTenantRole, LLMType
|
27 |
from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
|
28 |
-
LLM_FACTORY
|
29 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
30 |
from api.settings import stat_logger
|
31 |
from api.utils.api_utils import get_json_result, cors_reponse
|
@@ -220,7 +220,9 @@ def user_register(user_id, user):
|
|
220 |
"llm_factory": LLM_FACTORY,
|
221 |
"llm_name": llm.llm_name,
|
222 |
"model_type": llm.model_type,
|
223 |
-
"api_key": API_KEY
|
|
|
|
|
224 |
|
225 |
if not UserService.save(**user):
|
226 |
return
|
|
|
25 |
from api.utils import get_uuid, get_format_time, decrypt, download_img
|
26 |
from api.db import UserTenantRole, LLMType
|
27 |
from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
|
28 |
+
LLM_FACTORY, LLM_BASE_URL
|
29 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
30 |
from api.settings import stat_logger
|
31 |
from api.utils.api_utils import get_json_result, cors_reponse
|
|
|
220 |
"llm_factory": LLM_FACTORY,
|
221 |
"llm_name": llm.llm_name,
|
222 |
"model_type": llm.model_type,
|
223 |
+
"api_key": API_KEY,
|
224 |
+
"base_url": LLM_BASE_URL
|
225 |
+
})
|
226 |
|
227 |
if not UserService.save(**user):
|
228 |
return
|
api/db/init_data.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import time
|
17 |
import uuid
|
18 |
|
@@ -21,7 +22,7 @@ from api.db.db_models import init_database_tables as init_web_db
|
|
21 |
from api.db.services import UserService
|
22 |
from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
|
23 |
from api.db.services.user_service import TenantService, UserTenantService
|
24 |
-
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY
|
25 |
|
26 |
|
27 |
def init_superuser():
|
@@ -53,7 +54,7 @@ def init_superuser():
|
|
53 |
for llm in LLMService.query(fid=LLM_FACTORY):
|
54 |
tenant_llm.append(
|
55 |
{"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
|
56 |
-
"api_key": API_KEY})
|
57 |
|
58 |
if not UserService.save(**user_info):
|
59 |
print("\033[93m【ERROR】\033[0mcan't init admin.")
|
@@ -282,11 +283,8 @@ def init_llm_factory():
|
|
282 |
pass
|
283 |
|
284 |
"""
|
285 |
-
modify service_config
|
286 |
drop table llm;
|
287 |
drop table llm_factories;
|
288 |
-
update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
|
289 |
-
update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
|
290 |
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
|
291 |
alter table knowledgebase modify avatar longtext;
|
292 |
alter table user modify avatar longtext;
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import os
|
17 |
import time
|
18 |
import uuid
|
19 |
|
|
|
22 |
from api.db.services import UserService
|
23 |
from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
|
24 |
from api.db.services.user_service import TenantService, UserTenantService
|
25 |
+
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
26 |
|
27 |
|
28 |
def init_superuser():
|
|
|
54 |
for llm in LLMService.query(fid=LLM_FACTORY):
|
55 |
tenant_llm.append(
|
56 |
{"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
|
57 |
+
"api_key": API_KEY, "base_url": LLM_BASE_URL})
|
58 |
|
59 |
if not UserService.save(**user_info):
|
60 |
print("\033[93m【ERROR】\033[0mcan't init admin.")
|
|
|
283 |
pass
|
284 |
|
285 |
"""
|
|
|
286 |
drop table llm;
|
287 |
drop table llm_factories;
|
|
|
|
|
288 |
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
|
289 |
alter table knowledgebase modify avatar longtext;
|
290 |
alter table user modify avatar longtext;
|
api/settings.py
CHANGED
@@ -91,6 +91,8 @@ default_llm = {
|
|
91 |
}
|
92 |
LLM = get_base_config("user_default_llm", {})
|
93 |
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
|
|
|
|
94 |
if LLM_FACTORY not in default_llm:
|
95 |
print(
|
96 |
"\33[91m【ERROR】\33[0m:",
|
|
|
91 |
}
|
92 |
LLM = get_base_config("user_default_llm", {})
|
93 |
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
94 |
+
LLM_BASE_URL = LLM.get("base_url")
|
95 |
+
|
96 |
if LLM_FACTORY not in default_llm:
|
97 |
print(
|
98 |
"\33[91m【ERROR】\33[0m:",
|
docker/docker-compose-CN.yml
CHANGED
@@ -1,99 +1,12 @@
|
|
1 |
version: '2.2'
|
2 |
-
services:
|
3 |
-
es01:
|
4 |
-
container_name: ragflow-es-01
|
5 |
-
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
6 |
-
volumes:
|
7 |
-
- esdata01:/usr/share/elasticsearch/data
|
8 |
-
ports:
|
9 |
-
- ${ES_PORT}:9200
|
10 |
-
environment:
|
11 |
-
- node.name=es01
|
12 |
-
- cluster.name=${CLUSTER_NAME}
|
13 |
-
- cluster.initial_master_nodes=es01
|
14 |
-
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
15 |
-
- bootstrap.memory_lock=false
|
16 |
-
- xpack.security.enabled=false
|
17 |
-
- TZ=${TIMEZONE}
|
18 |
-
mem_limit: ${MEM_LIMIT}
|
19 |
-
ulimits:
|
20 |
-
memlock:
|
21 |
-
soft: -1
|
22 |
-
hard: -1
|
23 |
-
healthcheck:
|
24 |
-
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
25 |
-
interval: 10s
|
26 |
-
timeout: 10s
|
27 |
-
retries: 120
|
28 |
-
networks:
|
29 |
-
- ragflow
|
30 |
-
restart: always
|
31 |
-
|
32 |
-
kibana:
|
33 |
-
depends_on:
|
34 |
-
es01:
|
35 |
-
condition: service_healthy
|
36 |
-
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
37 |
-
container_name: ragflow-kibana
|
38 |
-
volumes:
|
39 |
-
- kibanadata:/usr/share/kibana/data
|
40 |
-
ports:
|
41 |
-
- ${KIBANA_PORT}:5601
|
42 |
-
environment:
|
43 |
-
- SERVERNAME=kibana
|
44 |
-
- ELASTICSEARCH_HOSTS=http://es01:9200
|
45 |
-
- TZ=${TIMEZONE}
|
46 |
-
mem_limit: ${MEM_LIMIT}
|
47 |
-
networks:
|
48 |
-
- ragflow
|
49 |
-
|
50 |
-
mysql:
|
51 |
-
image: mysql:5.7.18
|
52 |
-
container_name: ragflow-mysql
|
53 |
-
environment:
|
54 |
-
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
55 |
-
- TZ=${TIMEZONE}
|
56 |
-
command:
|
57 |
-
--max_connections=1000
|
58 |
-
--character-set-server=utf8mb4
|
59 |
-
--collation-server=utf8mb4_general_ci
|
60 |
-
--default-authentication-plugin=mysql_native_password
|
61 |
-
--tls_version="TLSv1.2,TLSv1.3"
|
62 |
-
--init-file /data/application/init.sql
|
63 |
-
ports:
|
64 |
-
- ${MYSQL_PORT}:3306
|
65 |
-
volumes:
|
66 |
-
- mysql_data:/var/lib/mysql
|
67 |
-
- ./init.sql:/data/application/init.sql
|
68 |
-
networks:
|
69 |
-
- ragflow
|
70 |
-
healthcheck:
|
71 |
-
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
72 |
-
interval: 10s
|
73 |
-
timeout: 10s
|
74 |
-
retries: 3
|
75 |
-
restart: always
|
76 |
-
|
77 |
|
78 |
-
minio:
|
79 |
-
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
80 |
-
container_name: ragflow-minio
|
81 |
-
command: server --console-address ":9001" /data
|
82 |
-
ports:
|
83 |
-
- 9000:9000
|
84 |
-
- 9001:9001
|
85 |
-
environment:
|
86 |
-
- MINIO_ROOT_USER=${MINIO_USER}
|
87 |
-
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
88 |
-
- TZ=${TIMEZONE}
|
89 |
-
volumes:
|
90 |
-
- minio_data:/data
|
91 |
-
networks:
|
92 |
-
- ragflow
|
93 |
-
restart: always
|
94 |
|
|
|
|
|
|
|
95 |
|
96 |
-
|
|
|
97 |
depends_on:
|
98 |
mysql:
|
99 |
condition: service_healthy
|
@@ -116,18 +29,3 @@ services:
|
|
116 |
networks:
|
117 |
- ragflow
|
118 |
restart: always
|
119 |
-
|
120 |
-
|
121 |
-
volumes:
|
122 |
-
esdata01:
|
123 |
-
driver: local
|
124 |
-
kibanadata:
|
125 |
-
driver: local
|
126 |
-
mysql_data:
|
127 |
-
driver: local
|
128 |
-
minio_data:
|
129 |
-
driver: local
|
130 |
-
|
131 |
-
networks:
|
132 |
-
ragflow:
|
133 |
-
driver: bridge
|
|
|
1 |
version: '2.2'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
include:
|
5 |
+
- path: ./docker-compose-base.yml
|
6 |
+
env_file: ./.env
|
7 |
|
8 |
+
services:
|
9 |
+
ragflow:
|
10 |
depends_on:
|
11 |
mysql:
|
12 |
condition: service_healthy
|
|
|
29 |
networks:
|
30 |
- ragflow
|
31 |
restart: always
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docker/docker-compose-base.yml
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '2.2'
|
2 |
+
|
3 |
+
services:
|
4 |
+
es01:
|
5 |
+
container_name: ragflow-es-01
|
6 |
+
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
7 |
+
volumes:
|
8 |
+
- esdata01:/usr/share/elasticsearch/data
|
9 |
+
ports:
|
10 |
+
- ${ES_PORT}:9200
|
11 |
+
environment:
|
12 |
+
- node.name=es01
|
13 |
+
- cluster.name=${CLUSTER_NAME}
|
14 |
+
- cluster.initial_master_nodes=es01
|
15 |
+
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
16 |
+
- bootstrap.memory_lock=false
|
17 |
+
- xpack.security.enabled=false
|
18 |
+
- cluster.max_shards_per_node=4096
|
19 |
+
- TZ=${TIMEZONE}
|
20 |
+
mem_limit: ${MEM_LIMIT}
|
21 |
+
ulimits:
|
22 |
+
memlock:
|
23 |
+
soft: -1
|
24 |
+
hard: -1
|
25 |
+
healthcheck:
|
26 |
+
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
27 |
+
interval: 10s
|
28 |
+
timeout: 10s
|
29 |
+
retries: 120
|
30 |
+
networks:
|
31 |
+
- ragflow
|
32 |
+
restart: always
|
33 |
+
|
34 |
+
kibana:
|
35 |
+
depends_on:
|
36 |
+
es01:
|
37 |
+
condition: service_healthy
|
38 |
+
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
39 |
+
container_name: ragflow-kibana
|
40 |
+
volumes:
|
41 |
+
- kibanadata:/usr/share/kibana/data
|
42 |
+
ports:
|
43 |
+
- ${KIBANA_PORT}:5601
|
44 |
+
environment:
|
45 |
+
- SERVERNAME=kibana
|
46 |
+
- ELASTICSEARCH_HOSTS=http://es01:9200
|
47 |
+
- TZ=${TIMEZONE}
|
48 |
+
mem_limit: ${MEM_LIMIT}
|
49 |
+
networks:
|
50 |
+
- ragflow
|
51 |
+
|
52 |
+
mysql:
|
53 |
+
image: mysql:5.7.18
|
54 |
+
container_name: ragflow-mysql
|
55 |
+
environment:
|
56 |
+
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
57 |
+
- TZ=${TIMEZONE}
|
58 |
+
command:
|
59 |
+
--max_connections=1000
|
60 |
+
--character-set-server=utf8mb4
|
61 |
+
--collation-server=utf8mb4_general_ci
|
62 |
+
--default-authentication-plugin=mysql_native_password
|
63 |
+
--tls_version="TLSv1.2,TLSv1.3"
|
64 |
+
--init-file /data/application/init.sql
|
65 |
+
ports:
|
66 |
+
- ${MYSQL_PORT}:3306
|
67 |
+
volumes:
|
68 |
+
- mysql_data:/var/lib/mysql
|
69 |
+
- ./init.sql:/data/application/init.sql
|
70 |
+
networks:
|
71 |
+
- ragflow
|
72 |
+
healthcheck:
|
73 |
+
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
74 |
+
interval: 10s
|
75 |
+
timeout: 10s
|
76 |
+
retries: 3
|
77 |
+
restart: always
|
78 |
+
|
79 |
+
|
80 |
+
minio:
|
81 |
+
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
82 |
+
container_name: ragflow-minio
|
83 |
+
command: server --console-address ":9001" /data
|
84 |
+
ports:
|
85 |
+
- 9000:9000
|
86 |
+
- 9001:9001
|
87 |
+
environment:
|
88 |
+
- MINIO_ROOT_USER=${MINIO_USER}
|
89 |
+
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
90 |
+
- TZ=${TIMEZONE}
|
91 |
+
volumes:
|
92 |
+
- minio_data:/data
|
93 |
+
networks:
|
94 |
+
- ragflow
|
95 |
+
restart: always
|
96 |
+
|
97 |
+
|
98 |
+
volumes:
|
99 |
+
esdata01:
|
100 |
+
driver: local
|
101 |
+
kibanadata:
|
102 |
+
driver: local
|
103 |
+
mysql_data:
|
104 |
+
driver: local
|
105 |
+
minio_data:
|
106 |
+
driver: local
|
107 |
+
|
108 |
+
networks:
|
109 |
+
ragflow:
|
110 |
+
driver: bridge
|
docker/docker-compose.yml
CHANGED
@@ -1,98 +1,10 @@
|
|
1 |
version: '2.2'
|
2 |
-
services:
|
3 |
-
es01:
|
4 |
-
container_name: ragflow-es-01
|
5 |
-
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
6 |
-
volumes:
|
7 |
-
- esdata01:/usr/share/elasticsearch/data
|
8 |
-
ports:
|
9 |
-
- ${ES_PORT}:9200
|
10 |
-
environment:
|
11 |
-
- node.name=es01
|
12 |
-
- cluster.name=${CLUSTER_NAME}
|
13 |
-
- cluster.initial_master_nodes=es01
|
14 |
-
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
15 |
-
- bootstrap.memory_lock=false
|
16 |
-
- xpack.security.enabled=false
|
17 |
-
- TZ=${TIMEZONE}
|
18 |
-
mem_limit: ${MEM_LIMIT}
|
19 |
-
ulimits:
|
20 |
-
memlock:
|
21 |
-
soft: -1
|
22 |
-
hard: -1
|
23 |
-
healthcheck:
|
24 |
-
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
25 |
-
interval: 10s
|
26 |
-
timeout: 10s
|
27 |
-
retries: 120
|
28 |
-
networks:
|
29 |
-
- ragflow
|
30 |
-
restart: always
|
31 |
-
|
32 |
-
kibana:
|
33 |
-
depends_on:
|
34 |
-
es01:
|
35 |
-
condition: service_healthy
|
36 |
-
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
37 |
-
container_name: ragflow-kibana
|
38 |
-
volumes:
|
39 |
-
- kibanadata:/usr/share/kibana/data
|
40 |
-
ports:
|
41 |
-
- ${KIBANA_PORT}:5601
|
42 |
-
environment:
|
43 |
-
- SERVERNAME=kibana
|
44 |
-
- ELASTICSEARCH_HOSTS=http://es01:9200
|
45 |
-
- TZ=${TIMEZONE}
|
46 |
-
mem_limit: ${MEM_LIMIT}
|
47 |
-
networks:
|
48 |
-
- ragflow
|
49 |
-
|
50 |
-
mysql:
|
51 |
-
image: mysql:5.7.18
|
52 |
-
container_name: ragflow-mysql
|
53 |
-
environment:
|
54 |
-
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
55 |
-
- TZ=${TIMEZONE}
|
56 |
-
command:
|
57 |
-
--max_connections=1000
|
58 |
-
--character-set-server=utf8mb4
|
59 |
-
--collation-server=utf8mb4_general_ci
|
60 |
-
--default-authentication-plugin=mysql_native_password
|
61 |
-
--tls_version="TLSv1.2,TLSv1.3"
|
62 |
-
--init-file /data/application/init.sql
|
63 |
-
ports:
|
64 |
-
- ${MYSQL_PORT}:3306
|
65 |
-
volumes:
|
66 |
-
- mysql_data:/var/lib/mysql
|
67 |
-
- ./init.sql:/data/application/init.sql
|
68 |
-
networks:
|
69 |
-
- ragflow
|
70 |
-
healthcheck:
|
71 |
-
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
72 |
-
interval: 10s
|
73 |
-
timeout: 10s
|
74 |
-
retries: 3
|
75 |
-
restart: always
|
76 |
-
|
77 |
-
|
78 |
-
minio:
|
79 |
-
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
80 |
-
container_name: ragflow-minio
|
81 |
-
command: server --console-address ":9001" /data
|
82 |
-
ports:
|
83 |
-
- 9000:9000
|
84 |
-
- 9001:9001
|
85 |
-
environment:
|
86 |
-
- MINIO_ROOT_USER=${MINIO_USER}
|
87 |
-
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
88 |
-
- TZ=${TIMEZONE}
|
89 |
-
volumes:
|
90 |
-
- minio_data:/data
|
91 |
-
networks:
|
92 |
-
- ragflow
|
93 |
-
restart: always
|
94 |
|
|
|
|
|
|
|
95 |
|
|
|
96 |
ragflow:
|
97 |
depends_on:
|
98 |
mysql:
|
@@ -107,6 +19,7 @@ services:
|
|
107 |
- 443:443
|
108 |
volumes:
|
109 |
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
|
|
|
110 |
- ./ragflow-logs:/ragflow/logs
|
111 |
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
|
112 |
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
|
@@ -116,18 +29,3 @@ services:
|
|
116 |
networks:
|
117 |
- ragflow
|
118 |
restart: always
|
119 |
-
|
120 |
-
|
121 |
-
volumes:
|
122 |
-
esdata01:
|
123 |
-
driver: local
|
124 |
-
kibanadata:
|
125 |
-
driver: local
|
126 |
-
mysql_data:
|
127 |
-
driver: local
|
128 |
-
minio_data:
|
129 |
-
driver: local
|
130 |
-
|
131 |
-
networks:
|
132 |
-
ragflow:
|
133 |
-
driver: bridge
|
|
|
1 |
version: '2.2'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
include:
|
4 |
+
- path: ./docker-compose-base.yml
|
5 |
+
env_file: ./.env
|
6 |
|
7 |
+
services:
|
8 |
ragflow:
|
9 |
depends_on:
|
10 |
mysql:
|
|
|
19 |
- 443:443
|
20 |
volumes:
|
21 |
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
|
22 |
+
- ./entrypoint.sh:/ragflow/entrypoint.sh
|
23 |
- ./ragflow-logs:/ragflow/logs
|
24 |
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
|
25 |
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
|
|
|
29 |
networks:
|
30 |
- ragflow
|
31 |
restart: always
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docker/entrypoint.sh
CHANGED
@@ -23,7 +23,7 @@ function watch_broker(){
|
|
23 |
}
|
24 |
|
25 |
function task_bro(){
|
26 |
-
sleep
|
27 |
watch_broker;
|
28 |
}
|
29 |
|
|
|
23 |
}
|
24 |
|
25 |
function task_bro(){
|
26 |
+
sleep 160;
|
27 |
watch_broker;
|
28 |
}
|
29 |
|
docker/service_conf.yaml
CHANGED
@@ -18,6 +18,7 @@ es:
|
|
18 |
user_default_llm:
|
19 |
factory: 'Tongyi-Qianwen'
|
20 |
api_key: 'sk-xxxxxxxxxxxxx'
|
|
|
21 |
oauth:
|
22 |
github:
|
23 |
client_id: xxxxxxxxxxxxxxxxxxxxxxxxx
|
|
|
18 |
user_default_llm:
|
19 |
factory: 'Tongyi-Qianwen'
|
20 |
api_key: 'sk-xxxxxxxxxxxxx'
|
21 |
+
base_url: ''
|
22 |
oauth:
|
23 |
github:
|
24 |
client_id: xxxxxxxxxxxxxxxxxxxxxxxxx
|
rag/app/naive.py
CHANGED
@@ -10,14 +10,59 @@
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
-
import
|
|
|
14 |
import re
|
15 |
from deepdoc.parser.pdf_parser import PlainParser
|
16 |
from rag.app import laws
|
17 |
from rag.nlp import huqie, is_english, tokenize, naive_merge, tokenize_table, add_positions, tokenize_chunks
|
18 |
-
from deepdoc.parser import PdfParser, ExcelParser
|
19 |
from rag.settings import cron_logger
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class Pdf(PdfParser):
|
23 |
def __call__(self, filename, binary=None, from_page=0,
|
@@ -75,8 +120,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
75 |
sections = []
|
76 |
if re.search(r"\.docx?$", filename, re.IGNORECASE):
|
77 |
callback(0.1, "Start to parse.")
|
78 |
-
|
79 |
-
|
80 |
callback(0.8, "Finish parsing.")
|
81 |
|
82 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
+
from io import BytesIO
|
14 |
+
from docx import Document
|
15 |
import re
|
16 |
from deepdoc.parser.pdf_parser import PlainParser
|
17 |
from rag.app import laws
|
18 |
from rag.nlp import huqie, is_english, tokenize, naive_merge, tokenize_table, add_positions, tokenize_chunks
|
19 |
+
from deepdoc.parser import PdfParser, ExcelParser, DocxParser
|
20 |
from rag.settings import cron_logger
|
21 |
|
22 |
+
class Docx(DocxParser):
|
23 |
+
def __init__(self):
|
24 |
+
pass
|
25 |
+
|
26 |
+
def __clean(self, line):
|
27 |
+
line = re.sub(r"\u3000", " ", line).strip()
|
28 |
+
return line
|
29 |
+
|
30 |
+
def __call__(self, filename, binary=None, from_page=0, to_page=100000):
|
31 |
+
self.doc = Document(
|
32 |
+
filename) if not binary else Document(BytesIO(binary))
|
33 |
+
pn = 0
|
34 |
+
lines = []
|
35 |
+
for p in self.doc.paragraphs:
|
36 |
+
if pn > to_page:
|
37 |
+
break
|
38 |
+
if from_page <= pn < to_page and p.text.strip():
|
39 |
+
lines.append(self.__clean(p.text))
|
40 |
+
for run in p.runs:
|
41 |
+
if 'lastRenderedPageBreak' in run._element.xml:
|
42 |
+
pn += 1
|
43 |
+
continue
|
44 |
+
if 'w:br' in run._element.xml and 'type="page"' in run._element.xml:
|
45 |
+
pn += 1
|
46 |
+
tbls = []
|
47 |
+
for tb in self.doc.tables:
|
48 |
+
html= "<table>"
|
49 |
+
for r in tb.rows:
|
50 |
+
html += "<tr>"
|
51 |
+
i = 0
|
52 |
+
while i < len(r.cells):
|
53 |
+
span = 1
|
54 |
+
c = r.cells[i]
|
55 |
+
for j in range(i+1, len(r.cells)):
|
56 |
+
if c.text == r.cells[j].text:
|
57 |
+
span += 1
|
58 |
+
i = j
|
59 |
+
i += 1
|
60 |
+
html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
|
61 |
+
html += "</tr>"
|
62 |
+
html += "</table>"
|
63 |
+
tbls.append(((None, html), ""))
|
64 |
+
return [(l, "") for l in lines if l], tbls
|
65 |
+
|
66 |
|
67 |
class Pdf(PdfParser):
|
68 |
def __call__(self, filename, binary=None, from_page=0,
|
|
|
120 |
sections = []
|
121 |
if re.search(r"\.docx?$", filename, re.IGNORECASE):
|
122 |
callback(0.1, "Start to parse.")
|
123 |
+
sections, tbls = Docx()(filename, binary)
|
124 |
+
res = tokenize_table(tbls, doc, eng)
|
125 |
callback(0.8, "Finish parsing.")
|
126 |
|
127 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
rag/app/table.py
CHANGED
@@ -223,8 +223,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
|
|
223 |
continue
|
224 |
if not str(row[clmns[j]]):
|
225 |
continue
|
226 |
-
if pd.isna(row[clmns[j]]):
|
227 |
-
|
228 |
fld = clmns_map[j][0]
|
229 |
d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
|
230 |
row[clmns[j]])
|
|
|
223 |
continue
|
224 |
if not str(row[clmns[j]]):
|
225 |
continue
|
226 |
+
#if pd.isna(row[clmns[j]]):
|
227 |
+
# continue
|
228 |
fld = clmns_map[j][0]
|
229 |
d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
|
230 |
row[clmns[j]])
|
rag/llm/chat_model.py
CHANGED
@@ -170,3 +170,4 @@ class LocalLLM(Base):
|
|
170 |
return ans, num_tokens_from_string(ans)
|
171 |
except Exception as e:
|
172 |
return "**ERROR**: " + str(e), 0
|
|
|
|
170 |
return ans, num_tokens_from_string(ans)
|
171 |
except Exception as e:
|
172 |
return "**ERROR**: " + str(e), 0
|
173 |
+
|
rag/nlp/__init__.py
CHANGED
@@ -68,6 +68,7 @@ def bullets_category(sections):
|
|
68 |
|
69 |
def is_english(texts):
|
70 |
eng = 0
|
|
|
71 |
for t in texts:
|
72 |
if re.match(r"[a-zA-Z]{2,}", t.strip()):
|
73 |
eng += 1
|
@@ -112,8 +113,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
|
112 |
d = copy.deepcopy(doc)
|
113 |
tokenize(d, rows, eng)
|
114 |
d["content_with_weight"] = rows
|
115 |
-
d["image"] = img
|
116 |
-
add_positions(d, poss)
|
117 |
res.append(d)
|
118 |
continue
|
119 |
de = "; " if eng else "; "
|
|
|
68 |
|
69 |
def is_english(texts):
|
70 |
eng = 0
|
71 |
+
if not texts: return False
|
72 |
for t in texts:
|
73 |
if re.match(r"[a-zA-Z]{2,}", t.strip()):
|
74 |
eng += 1
|
|
|
113 |
d = copy.deepcopy(doc)
|
114 |
tokenize(d, rows, eng)
|
115 |
d["content_with_weight"] = rows
|
116 |
+
if img: d["image"] = img
|
117 |
+
if poss: add_positions(d, poss)
|
118 |
res.append(d)
|
119 |
continue
|
120 |
de = "; " if eng else "; "
|
rag/nlp/search.py
CHANGED
@@ -46,7 +46,7 @@ class Dealer:
|
|
46 |
"k": topk,
|
47 |
"similarity": sim,
|
48 |
"num_candidates": topk * 2,
|
49 |
-
"query_vector": qv
|
50 |
}
|
51 |
|
52 |
def search(self, req, idxnm, emb_mdl=None):
|
|
|
46 |
"k": topk,
|
47 |
"similarity": sim,
|
48 |
"num_candidates": topk * 2,
|
49 |
+
"query_vector": list(qv)
|
50 |
}
|
51 |
|
52 |
def search(self, req, idxnm, emb_mdl=None):
|