Spaces:

retopara
/

ragflow

Build error

App Files Files Community

KevinHuSh commited on Feb 28, 2024

Commit

8887e47

1 Parent(s): 8e222fd

add dockerfile and fix trival bugs (#78)

Browse files

Files changed (14) hide show

Dockerfile +14 -0
README.md +47 -1
README_zh.md +1 -0
api/apps/document_app.py +16 -0
api/db/init_data.py +12 -11
api/db/services/document_service.py +8 -1
api/ragflow_server.py +9 -0
api/settings.py +30 -6
api/utils/__init__.py +1 -1
conf/service_conf.yaml +16 -21
docker/docker-compose.yml +18 -0
docker/entrypoint.sh +24 -0
docker/service_conf.yaml +36 -0
rag/utils/minio_conn.py +1 -1

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM infiniflow/ragflow-base:v1.0
+WORKDIR /ragflow
+COPY . ./
+RUN cd ./web && npm i && npm build
+ENV PYTHONPATH=/ragflow
+ENV HF_ENDPOINT=https://hf-mirror.com
+COPY docker/entrypoint.sh ./
+RUN chmod +x ./entrypoint.sh
+ENTRYPOINT ["/bin/bash", "./entrypoint.sh"]

README.md CHANGED Viewed

	@@ -1 +1,47 @@
1	- # ~~docgpt~~

+English | [简体中文](./README_zh.md)
+## System Environment Preparation
+### Install docker
+If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
+### OS Setups
+Inorder to run [ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html),
+you need to check the following command:
+```bash
+121:/ragflow# sysctl vm.max_map_count
+vm.max_map_count = 262144
+```
+If **vm.max_map_count** is not larger  than 65535, please run the following commands:
+```bash
+121:/ragflow# sudo sysctl -w vm.max_map_count=262144
+```
+However, this change is not persistent and will be reset after a system reboot.
+To make the change permanent, you need to update the **/etc/sysctl.conf file**.
+Add or update the following line in the file:
+```bash
+vm.max_map_count=262144
+```
+### Here we go!
+> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
+> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./conf/service_conf.yaml) which is a
+> configuration of the back-end service and should be consistent with [.env](./docker/.env).
+> - In [service_conf.yaml](./conf/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
+> In **user_default_llm** of [service_conf.yaml](./conf/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
+> It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system.
+> - We have supported the flowing LLM factory, and the others is coming soon:
+> [OpenAI](https://platform.openai.com/login?launch), [通义千问/QWen](https://dashscope.console.aliyun.com/model),
+> [智普AI/ZhipuAI](https://open.bigmodel.cn/)
+```bash
+121:/ragflow# cd docker
+121:/ragflow/docker# docker compose up
+```
+If after a few minutes, it stops screening and halts like following picture, _**Hallelujah!**_ You have successfully launched the system.
+<div align="center" style="margin-top:20px;margin-bottom:20px;">
+<img src="https://github.com/infiniflow/ragflow/assets/12318111/7dc8b73f-7890-41b4-aa09-97a417cfd20b" width="1000"/>
+</div>

README_zh.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ [English](./README.md) \| 简体中文

api/apps/document_app.py CHANGED Viewed

@@ -146,6 +146,21 @@ def list():
         return server_error_response(e)
 @manager.route('/change_status', methods=['POST'])
 @login_required
 @validate_request("doc_id", "status")
@@ -263,6 +278,7 @@ def rename():
 @manager.route('/get/<doc_id>', methods=['GET'])
 def get(doc_id):
     try:
         e, doc = DocumentService.get_by_id(doc_id)

         return server_error_response(e)
+@manager.route('/thumbnails', methods=['GET'])
+@login_required
+def thumbnails():
+    doc_ids = request.args.get("doc_ids").split(",")
+    if not doc_ids:
+        return get_json_result(
+            data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR)
+    try:
+        docs = DocumentService.get_thumbnails(doc_ids)
+        return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
+    except Exception as e:
+        return server_error_response(e)
 @manager.route('/change_status', methods=['POST'])
 @login_required
 @validate_request("doc_id", "status")
 @manager.route('/get/<doc_id>', methods=['GET'])
+#@login_required
 def get(doc_id):
     try:
         e, doc = DocumentService.get_by_id(doc_id)

api/db/init_data.py CHANGED Viewed

@@ -56,21 +56,21 @@ def init_superuser():
              "api_key": API_KEY})
     if not UserService.save(**user_info):
-        print("【ERROR】can't init admin.")
         return
     TenantService.insert(**tenant)
     UserTenantService.insert(**usr_tenant)
     TenantLLMService.insert_many(tenant_llm)
-    print("【INFO】Super user initialized. user name: admin, password: admin. Changing the password after logining is strongly recomanded.")
     chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
     msg = chat_mdl.chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
     if msg.find("ERROR: ") == 0:
-        print("【ERROR】: '{}' dosen't work. {}".format(tenant["llm_id"]), msg)
     embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
-    v,c = embd_mdl.encode(["Hello!"])
     if c == 0:
-        print("【ERROR】: '{}' dosen't work...".format(tenant["embd_id"]))
 def init_llm_factory():
@@ -89,12 +89,13 @@ def init_llm_factory():
             "logo": "",
             "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
             "status": "1",
-        },{
-            "name": "文心一言",
-            "logo": "",
-            "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
-            "status": "1",
         },
     ]
     llm_infos = [
         # ---------------------- OpenAI ------------------------
@@ -198,7 +199,7 @@ def init_llm_factory():
             "llm_name": "embedding-2",
             "tags": "TEXT EMBEDDING",
             "max_tokens": 512,
-            "model_type": LLMType.SPEECH2TEXT.value
         },
     ]
     for info in factory_infos:

              "api_key": API_KEY})
     if not UserService.save(**user_info):
+        print("\033[93m【ERROR】\033[0mcan't init admin.")
         return
     TenantService.insert(**tenant)
     UserTenantService.insert(**usr_tenant)
     TenantLLMService.insert_many(tenant_llm)
+    print("【INFO】Super user initialized. \033[93muser name: admin, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
     chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
     msg = chat_mdl.chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
     if msg.find("ERROR: ") == 0:
+        print("\33[91m【ERROR】\33[0m: ", "'{}' dosen't work. {}".format(tenant["llm_id"]), msg)
     embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
+    v, c = embd_mdl.encode(["Hello!"])
     if c == 0:
+        print("\33[91m【ERROR】\33[0m:", " '{}' dosen't work!".format(tenant["embd_id"]))
 def init_llm_factory():
             "logo": "",
             "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
             "status": "1",
         },
+        # {
+        #     "name": "文心一言",
+        #     "logo": "",
+        #     "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
+        #     "status": "1",
+        # },
     ]
     llm_infos = [
         # ---------------------- OpenAI ------------------------
             "llm_name": "embedding-2",
             "tags": "TEXT EMBEDDING",
             "max_tokens": 512,
+            "model_type": LLMType.EMBEDDING.value
         },
     ]
     for info in factory_infos:

api/db/services/document_service.py CHANGED Viewed

@@ -107,4 +107,11 @@ class DocumentService(CommonService):
         docs = cls.model.select(Knowledgebase.tenant_id).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(cls.model.id == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:return
-        return docs[0]["tenant_id"]

         docs = cls.model.select(Knowledgebase.tenant_id).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(cls.model.id == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:return
+        return docs[0]["tenant_id"]
+    @classmethod
+    @DB.connection_context()
+    def get_thumbnails(cls, docids):
+        fields = [cls.model.id, cls.model.thumbnail]
+        return list(cls.model.select(*fields).where(cls.model.id.in_(docids)).dicts())

api/ragflow_server.py CHANGED Viewed

@@ -33,6 +33,15 @@ from api.db.init_data import init_web_data
 from api.versions import get_versions
 if __name__ == '__main__':
     stat_logger.info(
         f'project base: {utils.file_utils.get_project_base_directory()}'
     )

 from api.versions import get_versions
 if __name__ == '__main__':
+    print("""
+    ____                 ______ __
+   / __ \ ____ _ ____ _ / ____// /____  _      __
+  / /_/ // __ `// __ `// /_   / // __ \| | /| / /
+ / _, _// /_/ // /_/ // __/  / // /_/ /| |/ |/ /
+/_/ |_| \__,_/ \__, //_/    /_/ \____/ |__/|__/
+              /____/
+    """)
     stat_logger.info(
         f'project base: {utils.file_utils.get_project_base_directory()}'
     )

api/settings.py CHANGED Viewed

@@ -45,12 +45,36 @@ REQUEST_MAX_WAIT_SEC = 300
 USE_REGISTRY = get_base_config("use_registry")
 LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY=LLM.get("factory", "通义千问")
-CHAT_MDL = LLM.get("chat_model", "qwen-plus")
-EMBEDDING_MDL = LLM.get("embedding_model", "text-embedding-v2")
-ASR_MDL = LLM.get("asr_model", "paraformer-realtime-8k-v1")
-IMAGE2TEXT_MDL = LLM.get("image2text_model", "qwen-vl-max")
 API_KEY = LLM.get("api_key", "infiniflow API Key")
 PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
@@ -72,7 +96,7 @@ RANDOM_INSTANCE_ID = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("random_inst
 PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
 PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
-DATABASE = decrypt_database_config()
 # Logger
 LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))

 USE_REGISTRY = get_base_config("use_registry")
+default_llm = {
+    "通义千问": {
+        "chat_model": "qwen-plus",
+        "embedding_model": "text-embedding-v2",
+        "image2text_model": "qwen-vl-max",
+        "asr_model": "paraformer-realtime-8k-v1",
+    },
+    "OpenAI": {
+        "chat_model": "gpt-3.5-turbo",
+        "embedding_model": "text-embedding-ada-002",
+        "image2text_model": "gpt-4-vision-preview",
+        "asr_model": "whisper-1",
+    },
+    "智普AI": {
+        "chat_model": "glm-3-turbo",
+        "embedding_model": "embedding-2",
+        "image2text_model": "glm-4v",
+        "asr_model": "",
+    },
+}
 LLM = get_base_config("user_default_llm", {})
+LLM_FACTORY = LLM.get("factory", "通义千问")
+if LLM_FACTORY not in default_llm:
+    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+    LLM_FACTORY = "通义千问"
+CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
+EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
+ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
+IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
 API_KEY = LLM.get("api_key", "infiniflow API Key")
 PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
 PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
 PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
+DATABASE = decrypt_database_config(name="mysql")
 # Logger
 LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))

api/utils/__init__.py CHANGED Viewed

@@ -264,7 +264,7 @@ def decrypt_database_password(password):
     return pwdecrypt_fun(private_key, password)
-def decrypt_database_config(database=None, passwd_key="passwd", name="database"):
     if not database:
         database = get_base_config(name, {})

     return pwdecrypt_fun(private_key, password)
+def decrypt_database_config(database=None, passwd_key="password", name="database"):
     if not database:
         database = get_base_config(name, {})

conf/service_conf.yaml CHANGED Viewed

@@ -1,41 +1,36 @@
-authentication:
-  client:
-    switch: false
-    http_app_key:
-    http_secret_key:
-  site:
-    switch: false
-permission:
-  switch: false
-  component: false
-  dataset: false
 ragflow:
-  # you must set real ip address, 127.0.0.1 and 0.0.0.0 is not supported
   host: 0.0.0.0
   http_port: 9380
-database:
   name: 'rag_flow'
   user: 'root'
-  passwd: 'infini_rag_flow'
   host: '127.0.0.1'
   port: 5455
   max_connections: 100
   stale_timeout: 30
 minio:
   user: 'rag_flow'
-  passwd: 'infini_rag_flow'
   host: '127.0.0.1:9000'
 es:
-  hosts: 'http://127.0.0.1:9200'
 user_default_llm:
   factory: '通义千问'
-  chat_model: 'qwen-plus'
-  embedding_model: 'text-embedding-v2'
-  asr_model: 'paraformer-realtime-8k-v1'
-  image2text_model: 'qwen-vl-max'
   api_key: 'sk-xxxxxxxxxxxxx'
 oauth:
   github:
     client_id: 302129228f0d96055bee
     secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
-    url: https://github.com/login/oauth/access_token

 ragflow:
   host: 0.0.0.0
   http_port: 9380
+mysql:
   name: 'rag_flow'
   user: 'root'
+  password: 'infini_rag_flow'
   host: '127.0.0.1'
   port: 5455
   max_connections: 100
   stale_timeout: 30
 minio:
   user: 'rag_flow'
+  password: 'infini_rag_flow'
   host: '127.0.0.1:9000'
 es:
+  hosts: 'http://127.0.0.1:1200'
 user_default_llm:
   factory: '通义千问'
   api_key: 'sk-xxxxxxxxxxxxx'
 oauth:
   github:
     client_id: 302129228f0d96055bee
     secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
+    url: https://github.com/login/oauth/access_token
+authentication:
+  client:
+    switch: false
+    http_app_key:
+    http_secret_key:
+  site:
+    switch: false
+permission:
+  switch: false
+  component: false
+  dataset: false

docker/docker-compose.yml CHANGED Viewed

@@ -84,6 +84,24 @@ services:
     restart: always
 volumes:
   esdata01:
     driver: local

     restart: always
+  ragflow:
+    depends_on:
+      - es01
+      - mysql
+      - minio
+    image: infiniflow/ragflow:v1.0
+    container_name: ragflow-server
+    ports:
+      - ${SVR_HTTP_PORT}:9380
+    volumes:
+      - ./service_conf.yaml:/ragflow/conf/service_conf.yaml
+      - ./nginx.conf:/etc/nginx/nginx.conf
+      - ./ragflow-logs:/ragflow/logs
+    networks:
+      - ragflow
+    restart: always
 volumes:
   esdata01:
     driver: local

docker/entrypoint.sh ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/bin/bash
+python rag/svr/task_broker.py &
+function task_exe(){
+  while [ 1 -eq 1 ];do mpirun -n 2 python rag/svr/task_executor.py ; done
+}
+function watch_broker(){
+  while [ 1 -eq 1];do
+    C=`ps aux|grep "task_broker.py"|grep -v grep|wc -l`;
+    if [ $C -lt 1 ];then
+      python rag/svr/task_broker.py &
+    fi
+    sleep 5;
+  done
+}
+task_exe &
+sleep 10;
+watch_broker &
+python api/ragflow_server.py

docker/service_conf.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+ragflow:
+  host: 0.0.0.0
+  http_port: 9380
+mysql:
+  name: 'rag_flow'
+  user: 'root'
+  password: 'infini_rag_flow'
+  host: '127.0.0.1'
+  port: 5455
+  max_connections: 100
+  stale_timeout: 30
+minio:
+  user: 'rag_flow'
+  password: 'infini_rag_flow'
+  host: '127.0.0.1:9000'
+es:
+  hosts: 'http://127.0.0.1:1200'
+user_default_llm:
+  factory: '通义千问'
+  api_key: 'sk-xxxxxxxxxxxxx'
+oauth:
+  github:
+    client_id: 302129228f0d96055bee
+    secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
+    url: https://github.com/login/oauth/access_token
+authentication:
+  client:
+    switch: false
+    http_app_key:
+    http_secret_key:
+  site:
+    switch: false
+permission:
+  switch: false
+  component: false
+  dataset: false

rag/utils/minio_conn.py CHANGED Viewed

@@ -23,7 +23,7 @@ class HuMinio(object):
         try:
             self.conn = Minio(settings.MINIO["host"],
                               access_key=settings.MINIO["user"],
-                              secret_key=settings.MINIO["passwd"],
                               secure=False
                               )
         except Exception as e:

         try:
             self.conn = Minio(settings.MINIO["host"],
                               access_key=settings.MINIO["user"],
+                              secret_key=settings.MINIO["password"],
                               secure=False
                               )
         except Exception as e: