Spaces:

retopara
/

ragflow

Build error

Kevin Hu commited on Sep 24, 2024

Commit

dbcbb17

1 Parent(s): 853826a

add lighten control (#2567)

### What problem does this PR solve?

#2295

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

Files changed (5) hide show

api/apps/llm_app.py +3 -1
api/settings.py +67 -67
deepdoc/parser/pdf_parser.py +5 -3
rag/llm/embedding_model.py +5 -3
rag/llm/rerank_model.py +6 -4

api/apps/llm_app.py CHANGED Viewed

@@ -18,6 +18,7 @@ import json
 from flask import request
 from flask_login import login_required, current_user
 from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
 from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
 from api.db import StatusEnum, LLMType
 from api.db.db_models import TenantLLM
@@ -319,13 +320,14 @@ def my_llms():
 @login_required
 def list_app():
     self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
     model_type = request.args.get("model_type")
     try:
         objs = TenantLLMService.query(tenant_id=current_user.id)
         facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
         llms = LLMService.get_all()
         llms = [m.to_dict()
-                for m in llms if m.status == StatusEnum.VALID.value]
         for m in llms:
             m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied

 from flask import request
 from flask_login import login_required, current_user
 from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
+from api.settings import LIGHTEN
 from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
 from api.db import StatusEnum, LLMType
 from api.db.db_models import TenantLLM
 @login_required
 def list_app():
     self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
+    weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else []
     model_type = request.args.get("model_type")
     try:
         objs = TenantLLMService.query(tenant_id=current_user.id)
         facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
         llms = LLMService.get_all()
         llms = [m.to_dict()
+                for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
         for m in llms:
             m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied

api/settings.py CHANGED Viewed

@@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow"
 SERVER_MODULE = "rag_flow_server.py"
 TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
 RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
 SUBPROCESS_STD_LOG_NAME = "std.log"
@@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300
 USE_REGISTRY = get_base_config("use_registry")
-default_llm = {
-    "Tongyi-Qianwen": {
-        "chat_model": "qwen-plus",
-        "embedding_model": "text-embedding-v2",
-        "image2text_model": "qwen-vl-max",
-        "asr_model": "paraformer-realtime-8k-v1",
-    },
-    "OpenAI": {
-        "chat_model": "gpt-3.5-turbo",
-        "embedding_model": "text-embedding-ada-002",
-        "image2text_model": "gpt-4-vision-preview",
-        "asr_model": "whisper-1",
-    },
-    "Azure-OpenAI": {
-        "chat_model": "azure-gpt-35-turbo",
-        "embedding_model": "azure-text-embedding-ada-002",
-        "image2text_model": "azure-gpt-4-vision-preview",
-        "asr_model": "azure-whisper-1",
-    },
-    "ZHIPU-AI": {
-        "chat_model": "glm-3-turbo",
-        "embedding_model": "embedding-2",
-        "image2text_model": "glm-4v",
-        "asr_model": "",
-    },
-    "Ollama": {
-        "chat_model": "qwen-14B-chat",
-        "embedding_model": "flag-embedding",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "Moonshot": {
-        "chat_model": "moonshot-v1-8k",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "DeepSeek": {
-        "chat_model": "deepseek-chat",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "VolcEngine": {
-        "chat_model": "",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "BAAI": {
-        "chat_model": "",
-        "embedding_model": "BAAI/bge-large-zh-v1.5",
-        "image2text_model": "",
-        "asr_model": "",
-        "rerank_model": "BAAI/bge-reranker-v2-m3",
-    }
-}
 LLM = get_base_config("user_default_llm", {})
 LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
 LLM_BASE_URL = LLM.get("base_url")
-if LLM_FACTORY not in default_llm:
-    print(
-        "\33[91m【ERROR】\33[0m:",
-        f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
-    LLM_FACTORY = "Tongyi-Qianwen"
-CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
-EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
-RERANK_MDL = default_llm["BAAI"]["rerank_model"]
-ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
-IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
 API_KEY = LLM.get("api_key", "")
 PARSERS = LLM.get(

 SERVER_MODULE = "rag_flow_server.py"
 TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
 RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
+LIGHTEN = os.environ.get('LIGHTEN')
 SUBPROCESS_STD_LOG_NAME = "std.log"
 USE_REGISTRY = get_base_config("use_registry")
 LLM = get_base_config("user_default_llm", {})
 LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
 LLM_BASE_URL = LLM.get("base_url")
+if not LIGHTEN:
+    default_llm = {
+        "Tongyi-Qianwen": {
+            "chat_model": "qwen-plus",
+            "embedding_model": "text-embedding-v2",
+            "image2text_model": "qwen-vl-max",
+            "asr_model": "paraformer-realtime-8k-v1",
+        },
+        "OpenAI": {
+            "chat_model": "gpt-3.5-turbo",
+            "embedding_model": "text-embedding-ada-002",
+            "image2text_model": "gpt-4-vision-preview",
+            "asr_model": "whisper-1",
+        },
+        "Azure-OpenAI": {
+            "chat_model": "azure-gpt-35-turbo",
+            "embedding_model": "azure-text-embedding-ada-002",
+            "image2text_model": "azure-gpt-4-vision-preview",
+            "asr_model": "azure-whisper-1",
+        },
+        "ZHIPU-AI": {
+            "chat_model": "glm-3-turbo",
+            "embedding_model": "embedding-2",
+            "image2text_model": "glm-4v",
+            "asr_model": "",
+        },
+        "Ollama": {
+            "chat_model": "qwen-14B-chat",
+            "embedding_model": "flag-embedding",
+            "image2text_model": "",
+            "asr_model": "",
+        },
+        "Moonshot": {
+            "chat_model": "moonshot-v1-8k",
+            "embedding_model": "",
+            "image2text_model": "",
+            "asr_model": "",
+        },
+        "DeepSeek": {
+            "chat_model": "deepseek-chat",
+            "embedding_model": "",
+            "image2text_model": "",
+            "asr_model": "",
+        },
+        "VolcEngine": {
+            "chat_model": "",
+            "embedding_model": "",
+            "image2text_model": "",
+            "asr_model": "",
+        },
+        "BAAI": {
+            "chat_model": "",
+            "embedding_model": "BAAI/bge-large-zh-v1.5",
+            "image2text_model": "",
+            "asr_model": "",
+            "rerank_model": "BAAI/bge-reranker-v2-m3",
+        }
+    }
+    CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
+    EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
+    RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else ""
+    ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
+    IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
+else:
+    CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = ""
 API_KEY = LLM.get("api_key", "")
 PARSERS = LLM.get(

deepdoc/parser/pdf_parser.py CHANGED Viewed

@@ -16,7 +16,6 @@ import random
 import xgboost as xgb
 from io import BytesIO
-import torch
 import re
 import pdfplumber
 import logging
@@ -25,6 +24,7 @@ import numpy as np
 from timeit import default_timer as timer
 from pypdf import PdfReader as pdf2_read
 from api.utils.file_utils import get_project_base_directory
 from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
 from rag.nlp import rag_tokenizer
@@ -44,8 +44,10 @@ class RAGFlowPdfParser:
         self.tbl_det = TableStructureRecognizer()
         self.updown_cnt_mdl = xgb.Booster()
-        if torch.cuda.is_available():
-            self.updown_cnt_mdl.set_param({"device": "cuda"})
         try:
             model_dir = os.path.join(
                 get_project_base_directory(),

 import xgboost as xgb
 from io import BytesIO
 import re
 import pdfplumber
 import logging
 from timeit import default_timer as timer
 from pypdf import PdfReader as pdf2_read
+from api.settings import LIGHTEN
 from api.utils.file_utils import get_project_base_directory
 from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
 from rag.nlp import rag_tokenizer
         self.tbl_det = TableStructureRecognizer()
         self.updown_cnt_mdl = xgb.Booster()
+        if not LIGHTEN:
+            import torch
+            if torch.cuda.is_available():
+                self.updown_cnt_mdl.set_param({"device": "cuda"})
         try:
             model_dir = os.path.join(
                 get_project_base_directory(),

rag/llm/embedding_model.py CHANGED Viewed

@@ -25,10 +25,10 @@ from abc import ABC
 from ollama import Client
 import dashscope
 from openai import OpenAI
-from FlagEmbedding import FlagModel
-import torch
 import numpy as np
 import asyncio
 from api.utils.file_utils import get_home_cache_dir
 from rag.utils import num_tokens_from_string, truncate
 import google.generativeai as genai
@@ -60,8 +60,10 @@ class DefaultEmbedding(Base):
         ^_-
         """
-        if not DefaultEmbedding._model:
             with DefaultEmbedding._model_lock:
                 if not DefaultEmbedding._model:
                     try:
                         DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),

 from ollama import Client
 import dashscope
 from openai import OpenAI
 import numpy as np
 import asyncio
+from api.settings import LIGHTEN
 from api.utils.file_utils import get_home_cache_dir
 from rag.utils import num_tokens_from_string, truncate
 import google.generativeai as genai
         ^_-
         """
+        if not LIGHTEN and not DefaultEmbedding._model:
             with DefaultEmbedding._model_lock:
+                from FlagEmbedding import FlagModel
+                import torch
                 if not DefaultEmbedding._model:
                     try:
                         DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),

rag/llm/rerank_model.py CHANGED Viewed

@@ -14,14 +14,14 @@
 #  limitations under the License.
 #
 import re
-import  threading
 import requests
-import torch
-from FlagEmbedding import FlagReranker
 from huggingface_hub import snapshot_download
 import os
 from abc import ABC
 import numpy as np
 from api.utils.file_utils import get_home_cache_dir
 from rag.utils import num_tokens_from_string, truncate
 import json
@@ -53,7 +53,9 @@ class DefaultRerank(Base):
         ^_-
         """
-        if not DefaultRerank._model:
             with DefaultRerank._model_lock:
                 if not DefaultRerank._model:
                     try:

 #  limitations under the License.
 #
 import re
+import threading
 import requests
 from huggingface_hub import snapshot_download
 import os
 from abc import ABC
 import numpy as np
+from api.settings import LIGHTEN
 from api.utils.file_utils import get_home_cache_dir
 from rag.utils import num_tokens_from_string, truncate
 import json
         ^_-
         """
+        if not LIGHTEN and not DefaultRerank._model:
+            import torch
+            from FlagEmbedding import FlagReranker
             with DefaultRerank._model_lock:
                 if not DefaultRerank._model:
                     try: