KevinHuSh commited on
Commit
8887e47
·
1 Parent(s): 8e222fd

add dockerfile and fix trival bugs (#78)

Browse files
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM infiniflow/ragflow-base:v1.0
2
+
3
+ WORKDIR /ragflow
4
+
5
+ COPY . ./
6
+ RUN cd ./web && npm i && npm build
7
+
8
+ ENV PYTHONPATH=/ragflow
9
+ ENV HF_ENDPOINT=https://hf-mirror.com
10
+
11
+ COPY docker/entrypoint.sh ./
12
+ RUN chmod +x ./entrypoint.sh
13
+
14
+ ENTRYPOINT ["/bin/bash", "./entrypoint.sh"]
README.md CHANGED
@@ -1 +1,47 @@
1
- # docgpt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ English | [简体中文](./README_zh.md)
2
+
3
+
4
+ ## System Environment Preparation
5
+
6
+ ### Install docker
7
+
8
+ If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
9
+
10
+ ### OS Setups
11
+ Inorder to run [ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html),
12
+ you need to check the following command:
13
+ ```bash
14
+ 121:/ragflow# sysctl vm.max_map_count
15
+ vm.max_map_count = 262144
16
+ ```
17
+ If **vm.max_map_count** is not larger than 65535, please run the following commands:
18
+ ```bash
19
+ 121:/ragflow# sudo sysctl -w vm.max_map_count=262144
20
+ ```
21
+ However, this change is not persistent and will be reset after a system reboot.
22
+ To make the change permanent, you need to update the **/etc/sysctl.conf file**.
23
+ Add or update the following line in the file:
24
+ ```bash
25
+ vm.max_map_count=262144
26
+ ```
27
+
28
+ ### Here we go!
29
+ > If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
30
+
31
+ > If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./conf/service_conf.yaml) which is a
32
+ > configuration of the back-end service and should be consistent with [.env](./docker/.env).
33
+
34
+ > - In [service_conf.yaml](./conf/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
35
+ > In **user_default_llm** of [service_conf.yaml](./conf/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
36
+ > It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system.
37
+ > - We have supported the flowing LLM factory, and the others is coming soon:
38
+ > [OpenAI](https://platform.openai.com/login?launch), [通义千问/QWen](https://dashscope.console.aliyun.com/model),
39
+ > [智普AI/ZhipuAI](https://open.bigmodel.cn/)
40
+ ```bash
41
+ 121:/ragflow# cd docker
42
+ 121:/ragflow/docker# docker compose up
43
+ ```
44
+ If after a few minutes, it stops screening and halts like following picture, _**Hallelujah!**_ You have successfully launched the system.
45
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
46
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/7dc8b73f-7890-41b4-aa09-97a417cfd20b" width="1000"/>
47
+ </div>
README_zh.md ADDED
@@ -0,0 +1 @@
 
 
1
+ [English](./README.md) | 简体中文
api/apps/document_app.py CHANGED
@@ -146,6 +146,21 @@ def list():
146
  return server_error_response(e)
147
 
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  @manager.route('/change_status', methods=['POST'])
150
  @login_required
151
  @validate_request("doc_id", "status")
@@ -263,6 +278,7 @@ def rename():
263
 
264
 
265
  @manager.route('/get/<doc_id>', methods=['GET'])
 
266
  def get(doc_id):
267
  try:
268
  e, doc = DocumentService.get_by_id(doc_id)
 
146
  return server_error_response(e)
147
 
148
 
149
+ @manager.route('/thumbnails', methods=['GET'])
150
+ @login_required
151
+ def thumbnails():
152
+ doc_ids = request.args.get("doc_ids").split(",")
153
+ if not doc_ids:
154
+ return get_json_result(
155
+ data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR)
156
+
157
+ try:
158
+ docs = DocumentService.get_thumbnails(doc_ids)
159
+ return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
160
+ except Exception as e:
161
+ return server_error_response(e)
162
+
163
+
164
  @manager.route('/change_status', methods=['POST'])
165
  @login_required
166
  @validate_request("doc_id", "status")
 
278
 
279
 
280
  @manager.route('/get/<doc_id>', methods=['GET'])
281
+ #@login_required
282
  def get(doc_id):
283
  try:
284
  e, doc = DocumentService.get_by_id(doc_id)
api/db/init_data.py CHANGED
@@ -56,21 +56,21 @@ def init_superuser():
56
  "api_key": API_KEY})
57
 
58
  if not UserService.save(**user_info):
59
- print("【ERROR】can't init admin.")
60
  return
61
  TenantService.insert(**tenant)
62
  UserTenantService.insert(**usr_tenant)
63
  TenantLLMService.insert_many(tenant_llm)
64
- print("【INFO】Super user initialized. user name: admin, password: admin. Changing the password after logining is strongly recomanded.")
65
 
66
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
67
  msg = chat_mdl.chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
68
  if msg.find("ERROR: ") == 0:
69
- print("【ERROR】: '{}' dosen't work. {}".format(tenant["llm_id"]), msg)
70
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
71
- v,c = embd_mdl.encode(["Hello!"])
72
  if c == 0:
73
- print("【ERROR】: '{}' dosen't work...".format(tenant["embd_id"]))
74
 
75
 
76
  def init_llm_factory():
@@ -89,12 +89,13 @@ def init_llm_factory():
89
  "logo": "",
90
  "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
91
  "status": "1",
92
- },{
93
- "name": "文心一言",
94
- "logo": "",
95
- "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
96
- "status": "1",
97
  },
 
 
 
 
 
 
98
  ]
99
  llm_infos = [
100
  # ---------------------- OpenAI ------------------------
@@ -198,7 +199,7 @@ def init_llm_factory():
198
  "llm_name": "embedding-2",
199
  "tags": "TEXT EMBEDDING",
200
  "max_tokens": 512,
201
- "model_type": LLMType.SPEECH2TEXT.value
202
  },
203
  ]
204
  for info in factory_infos:
 
56
  "api_key": API_KEY})
57
 
58
  if not UserService.save(**user_info):
59
+ print("\033[93m【ERROR】\033[0mcan't init admin.")
60
  return
61
  TenantService.insert(**tenant)
62
  UserTenantService.insert(**usr_tenant)
63
  TenantLLMService.insert_many(tenant_llm)
64
+ print("【INFO】Super user initialized. \033[93muser name: admin, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
65
 
66
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
67
  msg = chat_mdl.chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
68
  if msg.find("ERROR: ") == 0:
69
+ print("\33[91m【ERROR】\33[0m: ", "'{}' dosen't work. {}".format(tenant["llm_id"]), msg)
70
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
71
+ v, c = embd_mdl.encode(["Hello!"])
72
  if c == 0:
73
+ print("\33[91m【ERROR】\33[0m:", " '{}' dosen't work!".format(tenant["embd_id"]))
74
 
75
 
76
  def init_llm_factory():
 
89
  "logo": "",
90
  "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
91
  "status": "1",
 
 
 
 
 
92
  },
93
+ # {
94
+ # "name": "文心一言",
95
+ # "logo": "",
96
+ # "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
97
+ # "status": "1",
98
+ # },
99
  ]
100
  llm_infos = [
101
  # ---------------------- OpenAI ------------------------
 
199
  "llm_name": "embedding-2",
200
  "tags": "TEXT EMBEDDING",
201
  "max_tokens": 512,
202
+ "model_type": LLMType.EMBEDDING.value
203
  },
204
  ]
205
  for info in factory_infos:
api/db/services/document_service.py CHANGED
@@ -107,4 +107,11 @@ class DocumentService(CommonService):
107
  docs = cls.model.select(Knowledgebase.tenant_id).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(cls.model.id == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
108
  docs = docs.dicts()
109
  if not docs:return
110
- return docs[0]["tenant_id"]
 
 
 
 
 
 
 
 
107
  docs = cls.model.select(Knowledgebase.tenant_id).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(cls.model.id == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
108
  docs = docs.dicts()
109
  if not docs:return
110
+ return docs[0]["tenant_id"]
111
+
112
+
113
+ @classmethod
114
+ @DB.connection_context()
115
+ def get_thumbnails(cls, docids):
116
+ fields = [cls.model.id, cls.model.thumbnail]
117
+ return list(cls.model.select(*fields).where(cls.model.id.in_(docids)).dicts())
api/ragflow_server.py CHANGED
@@ -33,6 +33,15 @@ from api.db.init_data import init_web_data
33
  from api.versions import get_versions
34
 
35
  if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
36
  stat_logger.info(
37
  f'project base: {utils.file_utils.get_project_base_directory()}'
38
  )
 
33
  from api.versions import get_versions
34
 
35
  if __name__ == '__main__':
36
+ print("""
37
+ ____ ______ __
38
+ / __ \ ____ _ ____ _ / ____// /____ _ __
39
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
40
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
41
+ /_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
42
+ /____/
43
+
44
+ """)
45
  stat_logger.info(
46
  f'project base: {utils.file_utils.get_project_base_directory()}'
47
  )
api/settings.py CHANGED
@@ -45,12 +45,36 @@ REQUEST_MAX_WAIT_SEC = 300
45
 
46
  USE_REGISTRY = get_base_config("use_registry")
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  LLM = get_base_config("user_default_llm", {})
49
- LLM_FACTORY=LLM.get("factory", "通义千问")
50
- CHAT_MDL = LLM.get("chat_model", "qwen-plus")
51
- EMBEDDING_MDL = LLM.get("embedding_model", "text-embedding-v2")
52
- ASR_MDL = LLM.get("asr_model", "paraformer-realtime-8k-v1")
53
- IMAGE2TEXT_MDL = LLM.get("image2text_model", "qwen-vl-max")
 
 
 
 
54
  API_KEY = LLM.get("api_key", "infiniflow API Key")
55
  PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
56
 
@@ -72,7 +96,7 @@ RANDOM_INSTANCE_ID = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("random_inst
72
  PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
73
  PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
74
 
75
- DATABASE = decrypt_database_config()
76
 
77
  # Logger
78
  LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
 
45
 
46
  USE_REGISTRY = get_base_config("use_registry")
47
 
48
+ default_llm = {
49
+ "通义千问": {
50
+ "chat_model": "qwen-plus",
51
+ "embedding_model": "text-embedding-v2",
52
+ "image2text_model": "qwen-vl-max",
53
+ "asr_model": "paraformer-realtime-8k-v1",
54
+ },
55
+ "OpenAI": {
56
+ "chat_model": "gpt-3.5-turbo",
57
+ "embedding_model": "text-embedding-ada-002",
58
+ "image2text_model": "gpt-4-vision-preview",
59
+ "asr_model": "whisper-1",
60
+ },
61
+ "智普AI": {
62
+ "chat_model": "glm-3-turbo",
63
+ "embedding_model": "embedding-2",
64
+ "image2text_model": "glm-4v",
65
+ "asr_model": "",
66
+ },
67
+ }
68
  LLM = get_base_config("user_default_llm", {})
69
+ LLM_FACTORY = LLM.get("factory", "通义千问")
70
+ if LLM_FACTORY not in default_llm:
71
+ print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
72
+ LLM_FACTORY = "通义千问"
73
+ CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
74
+ EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
75
+ ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
76
+ IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
77
+
78
  API_KEY = LLM.get("api_key", "infiniflow API Key")
79
  PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
80
 
 
96
  PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
97
  PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
98
 
99
+ DATABASE = decrypt_database_config(name="mysql")
100
 
101
  # Logger
102
  LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
api/utils/__init__.py CHANGED
@@ -264,7 +264,7 @@ def decrypt_database_password(password):
264
  return pwdecrypt_fun(private_key, password)
265
 
266
 
267
- def decrypt_database_config(database=None, passwd_key="passwd", name="database"):
268
  if not database:
269
  database = get_base_config(name, {})
270
 
 
264
  return pwdecrypt_fun(private_key, password)
265
 
266
 
267
+ def decrypt_database_config(database=None, passwd_key="password", name="database"):
268
  if not database:
269
  database = get_base_config(name, {})
270
 
conf/service_conf.yaml CHANGED
@@ -1,41 +1,36 @@
1
- authentication:
2
- client:
3
- switch: false
4
- http_app_key:
5
- http_secret_key:
6
- site:
7
- switch: false
8
- permission:
9
- switch: false
10
- component: false
11
- dataset: false
12
  ragflow:
13
- # you must set real ip address, 127.0.0.1 and 0.0.0.0 is not supported
14
  host: 0.0.0.0
15
  http_port: 9380
16
- database:
17
  name: 'rag_flow'
18
  user: 'root'
19
- passwd: 'infini_rag_flow'
20
  host: '127.0.0.1'
21
  port: 5455
22
  max_connections: 100
23
  stale_timeout: 30
24
  minio:
25
  user: 'rag_flow'
26
- passwd: 'infini_rag_flow'
27
  host: '127.0.0.1:9000'
28
  es:
29
- hosts: 'http://127.0.0.1:9200'
30
  user_default_llm:
31
  factory: '通义千问'
32
- chat_model: 'qwen-plus'
33
- embedding_model: 'text-embedding-v2'
34
- asr_model: 'paraformer-realtime-8k-v1'
35
- image2text_model: 'qwen-vl-max'
36
  api_key: 'sk-xxxxxxxxxxxxx'
37
  oauth:
38
  github:
39
  client_id: 302129228f0d96055bee
40
  secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
41
- url: https://github.com/login/oauth/access_token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ragflow:
 
2
  host: 0.0.0.0
3
  http_port: 9380
4
+ mysql:
5
  name: 'rag_flow'
6
  user: 'root'
7
+ password: 'infini_rag_flow'
8
  host: '127.0.0.1'
9
  port: 5455
10
  max_connections: 100
11
  stale_timeout: 30
12
  minio:
13
  user: 'rag_flow'
14
+ password: 'infini_rag_flow'
15
  host: '127.0.0.1:9000'
16
  es:
17
+ hosts: 'http://127.0.0.1:1200'
18
  user_default_llm:
19
  factory: '通义千问'
 
 
 
 
20
  api_key: 'sk-xxxxxxxxxxxxx'
21
  oauth:
22
  github:
23
  client_id: 302129228f0d96055bee
24
  secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
25
+ url: https://github.com/login/oauth/access_token
26
+ authentication:
27
+ client:
28
+ switch: false
29
+ http_app_key:
30
+ http_secret_key:
31
+ site:
32
+ switch: false
33
+ permission:
34
+ switch: false
35
+ component: false
36
+ dataset: false
docker/docker-compose.yml CHANGED
@@ -84,6 +84,24 @@ services:
84
  restart: always
85
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  volumes:
88
  esdata01:
89
  driver: local
 
84
  restart: always
85
 
86
 
87
+ ragflow:
88
+ depends_on:
89
+ - es01
90
+ - mysql
91
+ - minio
92
+ image: infiniflow/ragflow:v1.0
93
+ container_name: ragflow-server
94
+ ports:
95
+ - ${SVR_HTTP_PORT}:9380
96
+ volumes:
97
+ - ./service_conf.yaml:/ragflow/conf/service_conf.yaml
98
+ - ./nginx.conf:/etc/nginx/nginx.conf
99
+ - ./ragflow-logs:/ragflow/logs
100
+ networks:
101
+ - ragflow
102
+ restart: always
103
+
104
+
105
  volumes:
106
  esdata01:
107
  driver: local
docker/entrypoint.sh ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ python rag/svr/task_broker.py &
4
+
5
+ function task_exe(){
6
+ while [ 1 -eq 1 ];do mpirun -n 2 python rag/svr/task_executor.py ; done
7
+ }
8
+
9
+ function watch_broker(){
10
+ while [ 1 -eq 1];do
11
+ C=`ps aux|grep "task_broker.py"|grep -v grep|wc -l`;
12
+ if [ $C -lt 1 ];then
13
+ python rag/svr/task_broker.py &
14
+ fi
15
+ sleep 5;
16
+ done
17
+ }
18
+
19
+
20
+ task_exe &
21
+ sleep 10;
22
+ watch_broker &
23
+
24
+ python api/ragflow_server.py
docker/service_conf.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ragflow:
2
+ host: 0.0.0.0
3
+ http_port: 9380
4
+ mysql:
5
+ name: 'rag_flow'
6
+ user: 'root'
7
+ password: 'infini_rag_flow'
8
+ host: '127.0.0.1'
9
+ port: 5455
10
+ max_connections: 100
11
+ stale_timeout: 30
12
+ minio:
13
+ user: 'rag_flow'
14
+ password: 'infini_rag_flow'
15
+ host: '127.0.0.1:9000'
16
+ es:
17
+ hosts: 'http://127.0.0.1:1200'
18
+ user_default_llm:
19
+ factory: '通义千问'
20
+ api_key: 'sk-xxxxxxxxxxxxx'
21
+ oauth:
22
+ github:
23
+ client_id: 302129228f0d96055bee
24
+ secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
25
+ url: https://github.com/login/oauth/access_token
26
+ authentication:
27
+ client:
28
+ switch: false
29
+ http_app_key:
30
+ http_secret_key:
31
+ site:
32
+ switch: false
33
+ permission:
34
+ switch: false
35
+ component: false
36
+ dataset: false
rag/utils/minio_conn.py CHANGED
@@ -23,7 +23,7 @@ class HuMinio(object):
23
  try:
24
  self.conn = Minio(settings.MINIO["host"],
25
  access_key=settings.MINIO["user"],
26
- secret_key=settings.MINIO["passwd"],
27
  secure=False
28
  )
29
  except Exception as e:
 
23
  try:
24
  self.conn = Minio(settings.MINIO["host"],
25
  access_key=settings.MINIO["user"],
26
+ secret_key=settings.MINIO["password"],
27
  secure=False
28
  )
29
  except Exception as e: