Kevin Hu
commited on
Commit
·
dbcbb17
1
Parent(s):
853826a
add lighten control (#2567)
Browse files### What problem does this PR solve?
#2295
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
- api/apps/llm_app.py +3 -1
- api/settings.py +67 -67
- deepdoc/parser/pdf_parser.py +5 -3
- rag/llm/embedding_model.py +5 -3
- rag/llm/rerank_model.py +6 -4
api/apps/llm_app.py
CHANGED
|
@@ -18,6 +18,7 @@ import json
|
|
| 18 |
from flask import request
|
| 19 |
from flask_login import login_required, current_user
|
| 20 |
from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
|
|
|
|
| 21 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 22 |
from api.db import StatusEnum, LLMType
|
| 23 |
from api.db.db_models import TenantLLM
|
|
@@ -319,13 +320,14 @@ def my_llms():
|
|
| 319 |
@login_required
|
| 320 |
def list_app():
|
| 321 |
self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
|
|
|
|
| 322 |
model_type = request.args.get("model_type")
|
| 323 |
try:
|
| 324 |
objs = TenantLLMService.query(tenant_id=current_user.id)
|
| 325 |
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
|
| 326 |
llms = LLMService.get_all()
|
| 327 |
llms = [m.to_dict()
|
| 328 |
-
for m in llms if m.status == StatusEnum.VALID.value]
|
| 329 |
for m in llms:
|
| 330 |
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied
|
| 331 |
|
|
|
|
| 18 |
from flask import request
|
| 19 |
from flask_login import login_required, current_user
|
| 20 |
from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
|
| 21 |
+
from api.settings import LIGHTEN
|
| 22 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
| 23 |
from api.db import StatusEnum, LLMType
|
| 24 |
from api.db.db_models import TenantLLM
|
|
|
|
| 320 |
@login_required
|
| 321 |
def list_app():
|
| 322 |
self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
|
| 323 |
+
weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else []
|
| 324 |
model_type = request.args.get("model_type")
|
| 325 |
try:
|
| 326 |
objs = TenantLLMService.query(tenant_id=current_user.id)
|
| 327 |
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
|
| 328 |
llms = LLMService.get_all()
|
| 329 |
llms = [m.to_dict()
|
| 330 |
+
for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
|
| 331 |
for m in llms:
|
| 332 |
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied
|
| 333 |
|
api/settings.py
CHANGED
|
@@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow"
|
|
| 42 |
SERVER_MODULE = "rag_flow_server.py"
|
| 43 |
TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
|
| 44 |
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
|
|
|
| 45 |
|
| 46 |
SUBPROCESS_STD_LOG_NAME = "std.log"
|
| 47 |
|
|
@@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300
|
|
| 57 |
|
| 58 |
USE_REGISTRY = get_base_config("use_registry")
|
| 59 |
|
| 60 |
-
default_llm = {
|
| 61 |
-
"Tongyi-Qianwen": {
|
| 62 |
-
"chat_model": "qwen-plus",
|
| 63 |
-
"embedding_model": "text-embedding-v2",
|
| 64 |
-
"image2text_model": "qwen-vl-max",
|
| 65 |
-
"asr_model": "paraformer-realtime-8k-v1",
|
| 66 |
-
},
|
| 67 |
-
"OpenAI": {
|
| 68 |
-
"chat_model": "gpt-3.5-turbo",
|
| 69 |
-
"embedding_model": "text-embedding-ada-002",
|
| 70 |
-
"image2text_model": "gpt-4-vision-preview",
|
| 71 |
-
"asr_model": "whisper-1",
|
| 72 |
-
},
|
| 73 |
-
"Azure-OpenAI": {
|
| 74 |
-
"chat_model": "azure-gpt-35-turbo",
|
| 75 |
-
"embedding_model": "azure-text-embedding-ada-002",
|
| 76 |
-
"image2text_model": "azure-gpt-4-vision-preview",
|
| 77 |
-
"asr_model": "azure-whisper-1",
|
| 78 |
-
},
|
| 79 |
-
"ZHIPU-AI": {
|
| 80 |
-
"chat_model": "glm-3-turbo",
|
| 81 |
-
"embedding_model": "embedding-2",
|
| 82 |
-
"image2text_model": "glm-4v",
|
| 83 |
-
"asr_model": "",
|
| 84 |
-
},
|
| 85 |
-
"Ollama": {
|
| 86 |
-
"chat_model": "qwen-14B-chat",
|
| 87 |
-
"embedding_model": "flag-embedding",
|
| 88 |
-
"image2text_model": "",
|
| 89 |
-
"asr_model": "",
|
| 90 |
-
},
|
| 91 |
-
"Moonshot": {
|
| 92 |
-
"chat_model": "moonshot-v1-8k",
|
| 93 |
-
"embedding_model": "",
|
| 94 |
-
"image2text_model": "",
|
| 95 |
-
"asr_model": "",
|
| 96 |
-
},
|
| 97 |
-
"DeepSeek": {
|
| 98 |
-
"chat_model": "deepseek-chat",
|
| 99 |
-
"embedding_model": "",
|
| 100 |
-
"image2text_model": "",
|
| 101 |
-
"asr_model": "",
|
| 102 |
-
},
|
| 103 |
-
"VolcEngine": {
|
| 104 |
-
"chat_model": "",
|
| 105 |
-
"embedding_model": "",
|
| 106 |
-
"image2text_model": "",
|
| 107 |
-
"asr_model": "",
|
| 108 |
-
},
|
| 109 |
-
"BAAI": {
|
| 110 |
-
"chat_model": "",
|
| 111 |
-
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
| 112 |
-
"image2text_model": "",
|
| 113 |
-
"asr_model": "",
|
| 114 |
-
"rerank_model": "BAAI/bge-reranker-v2-m3",
|
| 115 |
-
}
|
| 116 |
-
}
|
| 117 |
LLM = get_base_config("user_default_llm", {})
|
| 118 |
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
| 119 |
LLM_BASE_URL = LLM.get("base_url")
|
| 120 |
|
| 121 |
-
if
|
| 122 |
-
|
| 123 |
-
"
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
API_KEY = LLM.get("api_key", "")
|
| 133 |
PARSERS = LLM.get(
|
|
|
|
| 42 |
SERVER_MODULE = "rag_flow_server.py"
|
| 43 |
TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
|
| 44 |
RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
|
| 45 |
+
LIGHTEN = os.environ.get('LIGHTEN')
|
| 46 |
|
| 47 |
SUBPROCESS_STD_LOG_NAME = "std.log"
|
| 48 |
|
|
|
|
| 58 |
|
| 59 |
USE_REGISTRY = get_base_config("use_registry")
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
LLM = get_base_config("user_default_llm", {})
|
| 62 |
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
| 63 |
LLM_BASE_URL = LLM.get("base_url")
|
| 64 |
|
| 65 |
+
if not LIGHTEN:
|
| 66 |
+
default_llm = {
|
| 67 |
+
"Tongyi-Qianwen": {
|
| 68 |
+
"chat_model": "qwen-plus",
|
| 69 |
+
"embedding_model": "text-embedding-v2",
|
| 70 |
+
"image2text_model": "qwen-vl-max",
|
| 71 |
+
"asr_model": "paraformer-realtime-8k-v1",
|
| 72 |
+
},
|
| 73 |
+
"OpenAI": {
|
| 74 |
+
"chat_model": "gpt-3.5-turbo",
|
| 75 |
+
"embedding_model": "text-embedding-ada-002",
|
| 76 |
+
"image2text_model": "gpt-4-vision-preview",
|
| 77 |
+
"asr_model": "whisper-1",
|
| 78 |
+
},
|
| 79 |
+
"Azure-OpenAI": {
|
| 80 |
+
"chat_model": "azure-gpt-35-turbo",
|
| 81 |
+
"embedding_model": "azure-text-embedding-ada-002",
|
| 82 |
+
"image2text_model": "azure-gpt-4-vision-preview",
|
| 83 |
+
"asr_model": "azure-whisper-1",
|
| 84 |
+
},
|
| 85 |
+
"ZHIPU-AI": {
|
| 86 |
+
"chat_model": "glm-3-turbo",
|
| 87 |
+
"embedding_model": "embedding-2",
|
| 88 |
+
"image2text_model": "glm-4v",
|
| 89 |
+
"asr_model": "",
|
| 90 |
+
},
|
| 91 |
+
"Ollama": {
|
| 92 |
+
"chat_model": "qwen-14B-chat",
|
| 93 |
+
"embedding_model": "flag-embedding",
|
| 94 |
+
"image2text_model": "",
|
| 95 |
+
"asr_model": "",
|
| 96 |
+
},
|
| 97 |
+
"Moonshot": {
|
| 98 |
+
"chat_model": "moonshot-v1-8k",
|
| 99 |
+
"embedding_model": "",
|
| 100 |
+
"image2text_model": "",
|
| 101 |
+
"asr_model": "",
|
| 102 |
+
},
|
| 103 |
+
"DeepSeek": {
|
| 104 |
+
"chat_model": "deepseek-chat",
|
| 105 |
+
"embedding_model": "",
|
| 106 |
+
"image2text_model": "",
|
| 107 |
+
"asr_model": "",
|
| 108 |
+
},
|
| 109 |
+
"VolcEngine": {
|
| 110 |
+
"chat_model": "",
|
| 111 |
+
"embedding_model": "",
|
| 112 |
+
"image2text_model": "",
|
| 113 |
+
"asr_model": "",
|
| 114 |
+
},
|
| 115 |
+
"BAAI": {
|
| 116 |
+
"chat_model": "",
|
| 117 |
+
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
| 118 |
+
"image2text_model": "",
|
| 119 |
+
"asr_model": "",
|
| 120 |
+
"rerank_model": "BAAI/bge-reranker-v2-m3",
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
|
| 125 |
+
EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
|
| 126 |
+
RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else ""
|
| 127 |
+
ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
|
| 128 |
+
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
|
| 129 |
+
else:
|
| 130 |
+
CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = ""
|
| 131 |
|
| 132 |
API_KEY = LLM.get("api_key", "")
|
| 133 |
PARSERS = LLM.get(
|
deepdoc/parser/pdf_parser.py
CHANGED
|
@@ -16,7 +16,6 @@ import random
|
|
| 16 |
|
| 17 |
import xgboost as xgb
|
| 18 |
from io import BytesIO
|
| 19 |
-
import torch
|
| 20 |
import re
|
| 21 |
import pdfplumber
|
| 22 |
import logging
|
|
@@ -25,6 +24,7 @@ import numpy as np
|
|
| 25 |
from timeit import default_timer as timer
|
| 26 |
from pypdf import PdfReader as pdf2_read
|
| 27 |
|
|
|
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
| 29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
| 30 |
from rag.nlp import rag_tokenizer
|
|
@@ -44,8 +44,10 @@ class RAGFlowPdfParser:
|
|
| 44 |
self.tbl_det = TableStructureRecognizer()
|
| 45 |
|
| 46 |
self.updown_cnt_mdl = xgb.Booster()
|
| 47 |
-
if
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
try:
|
| 50 |
model_dir = os.path.join(
|
| 51 |
get_project_base_directory(),
|
|
|
|
| 16 |
|
| 17 |
import xgboost as xgb
|
| 18 |
from io import BytesIO
|
|
|
|
| 19 |
import re
|
| 20 |
import pdfplumber
|
| 21 |
import logging
|
|
|
|
| 24 |
from timeit import default_timer as timer
|
| 25 |
from pypdf import PdfReader as pdf2_read
|
| 26 |
|
| 27 |
+
from api.settings import LIGHTEN
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
| 29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
| 30 |
from rag.nlp import rag_tokenizer
|
|
|
|
| 44 |
self.tbl_det = TableStructureRecognizer()
|
| 45 |
|
| 46 |
self.updown_cnt_mdl = xgb.Booster()
|
| 47 |
+
if not LIGHTEN:
|
| 48 |
+
import torch
|
| 49 |
+
if torch.cuda.is_available():
|
| 50 |
+
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
| 51 |
try:
|
| 52 |
model_dir = os.path.join(
|
| 53 |
get_project_base_directory(),
|
rag/llm/embedding_model.py
CHANGED
|
@@ -25,10 +25,10 @@ from abc import ABC
|
|
| 25 |
from ollama import Client
|
| 26 |
import dashscope
|
| 27 |
from openai import OpenAI
|
| 28 |
-
from FlagEmbedding import FlagModel
|
| 29 |
-
import torch
|
| 30 |
import numpy as np
|
| 31 |
import asyncio
|
|
|
|
|
|
|
| 32 |
from api.utils.file_utils import get_home_cache_dir
|
| 33 |
from rag.utils import num_tokens_from_string, truncate
|
| 34 |
import google.generativeai as genai
|
|
@@ -60,8 +60,10 @@ class DefaultEmbedding(Base):
|
|
| 60 |
^_-
|
| 61 |
|
| 62 |
"""
|
| 63 |
-
if not DefaultEmbedding._model:
|
| 64 |
with DefaultEmbedding._model_lock:
|
|
|
|
|
|
|
| 65 |
if not DefaultEmbedding._model:
|
| 66 |
try:
|
| 67 |
DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
|
|
|
|
| 25 |
from ollama import Client
|
| 26 |
import dashscope
|
| 27 |
from openai import OpenAI
|
|
|
|
|
|
|
| 28 |
import numpy as np
|
| 29 |
import asyncio
|
| 30 |
+
|
| 31 |
+
from api.settings import LIGHTEN
|
| 32 |
from api.utils.file_utils import get_home_cache_dir
|
| 33 |
from rag.utils import num_tokens_from_string, truncate
|
| 34 |
import google.generativeai as genai
|
|
|
|
| 60 |
^_-
|
| 61 |
|
| 62 |
"""
|
| 63 |
+
if not LIGHTEN and not DefaultEmbedding._model:
|
| 64 |
with DefaultEmbedding._model_lock:
|
| 65 |
+
from FlagEmbedding import FlagModel
|
| 66 |
+
import torch
|
| 67 |
if not DefaultEmbedding._model:
|
| 68 |
try:
|
| 69 |
DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
|
rag/llm/rerank_model.py
CHANGED
|
@@ -14,14 +14,14 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import re
|
| 17 |
-
import
|
| 18 |
import requests
|
| 19 |
-
import torch
|
| 20 |
-
from FlagEmbedding import FlagReranker
|
| 21 |
from huggingface_hub import snapshot_download
|
| 22 |
import os
|
| 23 |
from abc import ABC
|
| 24 |
import numpy as np
|
|
|
|
|
|
|
| 25 |
from api.utils.file_utils import get_home_cache_dir
|
| 26 |
from rag.utils import num_tokens_from_string, truncate
|
| 27 |
import json
|
|
@@ -53,7 +53,9 @@ class DefaultRerank(Base):
|
|
| 53 |
^_-
|
| 54 |
|
| 55 |
"""
|
| 56 |
-
if not DefaultRerank._model:
|
|
|
|
|
|
|
| 57 |
with DefaultRerank._model_lock:
|
| 58 |
if not DefaultRerank._model:
|
| 59 |
try:
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
import re
|
| 17 |
+
import threading
|
| 18 |
import requests
|
|
|
|
|
|
|
| 19 |
from huggingface_hub import snapshot_download
|
| 20 |
import os
|
| 21 |
from abc import ABC
|
| 22 |
import numpy as np
|
| 23 |
+
|
| 24 |
+
from api.settings import LIGHTEN
|
| 25 |
from api.utils.file_utils import get_home_cache_dir
|
| 26 |
from rag.utils import num_tokens_from_string, truncate
|
| 27 |
import json
|
|
|
|
| 53 |
^_-
|
| 54 |
|
| 55 |
"""
|
| 56 |
+
if not LIGHTEN and not DefaultRerank._model:
|
| 57 |
+
import torch
|
| 58 |
+
from FlagEmbedding import FlagReranker
|
| 59 |
with DefaultRerank._model_lock:
|
| 60 |
if not DefaultRerank._model:
|
| 61 |
try:
|