Kevin Hu commited on
Commit
dbcbb17
·
1 Parent(s): 853826a

add lighten control (#2567)

Browse files

### What problem does this PR solve?

#2295

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

api/apps/llm_app.py CHANGED
@@ -18,6 +18,7 @@ import json
18
  from flask import request
19
  from flask_login import login_required, current_user
20
  from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
 
21
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
22
  from api.db import StatusEnum, LLMType
23
  from api.db.db_models import TenantLLM
@@ -319,13 +320,14 @@ def my_llms():
319
  @login_required
320
  def list_app():
321
  self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
 
322
  model_type = request.args.get("model_type")
323
  try:
324
  objs = TenantLLMService.query(tenant_id=current_user.id)
325
  facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
326
  llms = LLMService.get_all()
327
  llms = [m.to_dict()
328
- for m in llms if m.status == StatusEnum.VALID.value]
329
  for m in llms:
330
  m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied
331
 
 
18
  from flask import request
19
  from flask_login import login_required, current_user
20
  from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
21
+ from api.settings import LIGHTEN
22
  from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
23
  from api.db import StatusEnum, LLMType
24
  from api.db.db_models import TenantLLM
 
320
  @login_required
321
  def list_app():
322
  self_deploied = ["Youdao","FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio"]
323
+ weighted = ["Youdao","FastEmbed", "BAAI"] if LIGHTEN else []
324
  model_type = request.args.get("model_type")
325
  try:
326
  objs = TenantLLMService.query(tenant_id=current_user.id)
327
  facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
328
  llms = LLMService.get_all()
329
  llms = [m.to_dict()
330
+ for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
331
  for m in llms:
332
  m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deploied
333
 
api/settings.py CHANGED
@@ -42,6 +42,7 @@ RAG_FLOW_SERVICE_NAME = "ragflow"
42
  SERVER_MODULE = "rag_flow_server.py"
43
  TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
44
  RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
 
45
 
46
  SUBPROCESS_STD_LOG_NAME = "std.log"
47
 
@@ -57,77 +58,76 @@ REQUEST_MAX_WAIT_SEC = 300
57
 
58
  USE_REGISTRY = get_base_config("use_registry")
59
 
60
- default_llm = {
61
- "Tongyi-Qianwen": {
62
- "chat_model": "qwen-plus",
63
- "embedding_model": "text-embedding-v2",
64
- "image2text_model": "qwen-vl-max",
65
- "asr_model": "paraformer-realtime-8k-v1",
66
- },
67
- "OpenAI": {
68
- "chat_model": "gpt-3.5-turbo",
69
- "embedding_model": "text-embedding-ada-002",
70
- "image2text_model": "gpt-4-vision-preview",
71
- "asr_model": "whisper-1",
72
- },
73
- "Azure-OpenAI": {
74
- "chat_model": "azure-gpt-35-turbo",
75
- "embedding_model": "azure-text-embedding-ada-002",
76
- "image2text_model": "azure-gpt-4-vision-preview",
77
- "asr_model": "azure-whisper-1",
78
- },
79
- "ZHIPU-AI": {
80
- "chat_model": "glm-3-turbo",
81
- "embedding_model": "embedding-2",
82
- "image2text_model": "glm-4v",
83
- "asr_model": "",
84
- },
85
- "Ollama": {
86
- "chat_model": "qwen-14B-chat",
87
- "embedding_model": "flag-embedding",
88
- "image2text_model": "",
89
- "asr_model": "",
90
- },
91
- "Moonshot": {
92
- "chat_model": "moonshot-v1-8k",
93
- "embedding_model": "",
94
- "image2text_model": "",
95
- "asr_model": "",
96
- },
97
- "DeepSeek": {
98
- "chat_model": "deepseek-chat",
99
- "embedding_model": "",
100
- "image2text_model": "",
101
- "asr_model": "",
102
- },
103
- "VolcEngine": {
104
- "chat_model": "",
105
- "embedding_model": "",
106
- "image2text_model": "",
107
- "asr_model": "",
108
- },
109
- "BAAI": {
110
- "chat_model": "",
111
- "embedding_model": "BAAI/bge-large-zh-v1.5",
112
- "image2text_model": "",
113
- "asr_model": "",
114
- "rerank_model": "BAAI/bge-reranker-v2-m3",
115
- }
116
- }
117
  LLM = get_base_config("user_default_llm", {})
118
  LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
119
  LLM_BASE_URL = LLM.get("base_url")
120
 
121
- if LLM_FACTORY not in default_llm:
122
- print(
123
- "\33[91m【ERROR】\33[0m:",
124
- f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
125
- LLM_FACTORY = "Tongyi-Qianwen"
126
- CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
127
- EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
128
- RERANK_MDL = default_llm["BAAI"]["rerank_model"]
129
- ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
130
- IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  API_KEY = LLM.get("api_key", "")
133
  PARSERS = LLM.get(
 
42
  SERVER_MODULE = "rag_flow_server.py"
43
  TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
44
  RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
45
+ LIGHTEN = os.environ.get('LIGHTEN')
46
 
47
  SUBPROCESS_STD_LOG_NAME = "std.log"
48
 
 
58
 
59
  USE_REGISTRY = get_base_config("use_registry")
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  LLM = get_base_config("user_default_llm", {})
62
  LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
63
  LLM_BASE_URL = LLM.get("base_url")
64
 
65
+ if not LIGHTEN:
66
+ default_llm = {
67
+ "Tongyi-Qianwen": {
68
+ "chat_model": "qwen-plus",
69
+ "embedding_model": "text-embedding-v2",
70
+ "image2text_model": "qwen-vl-max",
71
+ "asr_model": "paraformer-realtime-8k-v1",
72
+ },
73
+ "OpenAI": {
74
+ "chat_model": "gpt-3.5-turbo",
75
+ "embedding_model": "text-embedding-ada-002",
76
+ "image2text_model": "gpt-4-vision-preview",
77
+ "asr_model": "whisper-1",
78
+ },
79
+ "Azure-OpenAI": {
80
+ "chat_model": "azure-gpt-35-turbo",
81
+ "embedding_model": "azure-text-embedding-ada-002",
82
+ "image2text_model": "azure-gpt-4-vision-preview",
83
+ "asr_model": "azure-whisper-1",
84
+ },
85
+ "ZHIPU-AI": {
86
+ "chat_model": "glm-3-turbo",
87
+ "embedding_model": "embedding-2",
88
+ "image2text_model": "glm-4v",
89
+ "asr_model": "",
90
+ },
91
+ "Ollama": {
92
+ "chat_model": "qwen-14B-chat",
93
+ "embedding_model": "flag-embedding",
94
+ "image2text_model": "",
95
+ "asr_model": "",
96
+ },
97
+ "Moonshot": {
98
+ "chat_model": "moonshot-v1-8k",
99
+ "embedding_model": "",
100
+ "image2text_model": "",
101
+ "asr_model": "",
102
+ },
103
+ "DeepSeek": {
104
+ "chat_model": "deepseek-chat",
105
+ "embedding_model": "",
106
+ "image2text_model": "",
107
+ "asr_model": "",
108
+ },
109
+ "VolcEngine": {
110
+ "chat_model": "",
111
+ "embedding_model": "",
112
+ "image2text_model": "",
113
+ "asr_model": "",
114
+ },
115
+ "BAAI": {
116
+ "chat_model": "",
117
+ "embedding_model": "BAAI/bge-large-zh-v1.5",
118
+ "image2text_model": "",
119
+ "asr_model": "",
120
+ "rerank_model": "BAAI/bge-reranker-v2-m3",
121
+ }
122
+ }
123
+
124
+ CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
125
+ EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
126
+ RERANK_MDL = default_llm["BAAI"]["rerank_model"] if not LIGHTEN else ""
127
+ ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
128
+ IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
129
+ else:
130
+ CHAT_MDL = EMBEDDING_MDL = RERANK_MDL = ASR_MDL = IMAGE2TEXT_MDL = ""
131
 
132
  API_KEY = LLM.get("api_key", "")
133
  PARSERS = LLM.get(
deepdoc/parser/pdf_parser.py CHANGED
@@ -16,7 +16,6 @@ import random
16
 
17
  import xgboost as xgb
18
  from io import BytesIO
19
- import torch
20
  import re
21
  import pdfplumber
22
  import logging
@@ -25,6 +24,7 @@ import numpy as np
25
  from timeit import default_timer as timer
26
  from pypdf import PdfReader as pdf2_read
27
 
 
28
  from api.utils.file_utils import get_project_base_directory
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
30
  from rag.nlp import rag_tokenizer
@@ -44,8 +44,10 @@ class RAGFlowPdfParser:
44
  self.tbl_det = TableStructureRecognizer()
45
 
46
  self.updown_cnt_mdl = xgb.Booster()
47
- if torch.cuda.is_available():
48
- self.updown_cnt_mdl.set_param({"device": "cuda"})
 
 
49
  try:
50
  model_dir = os.path.join(
51
  get_project_base_directory(),
 
16
 
17
  import xgboost as xgb
18
  from io import BytesIO
 
19
  import re
20
  import pdfplumber
21
  import logging
 
24
  from timeit import default_timer as timer
25
  from pypdf import PdfReader as pdf2_read
26
 
27
+ from api.settings import LIGHTEN
28
  from api.utils.file_utils import get_project_base_directory
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
30
  from rag.nlp import rag_tokenizer
 
44
  self.tbl_det = TableStructureRecognizer()
45
 
46
  self.updown_cnt_mdl = xgb.Booster()
47
+ if not LIGHTEN:
48
+ import torch
49
+ if torch.cuda.is_available():
50
+ self.updown_cnt_mdl.set_param({"device": "cuda"})
51
  try:
52
  model_dir = os.path.join(
53
  get_project_base_directory(),
rag/llm/embedding_model.py CHANGED
@@ -25,10 +25,10 @@ from abc import ABC
25
  from ollama import Client
26
  import dashscope
27
  from openai import OpenAI
28
- from FlagEmbedding import FlagModel
29
- import torch
30
  import numpy as np
31
  import asyncio
 
 
32
  from api.utils.file_utils import get_home_cache_dir
33
  from rag.utils import num_tokens_from_string, truncate
34
  import google.generativeai as genai
@@ -60,8 +60,10 @@ class DefaultEmbedding(Base):
60
  ^_-
61
 
62
  """
63
- if not DefaultEmbedding._model:
64
  with DefaultEmbedding._model_lock:
 
 
65
  if not DefaultEmbedding._model:
66
  try:
67
  DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
 
25
  from ollama import Client
26
  import dashscope
27
  from openai import OpenAI
 
 
28
  import numpy as np
29
  import asyncio
30
+
31
+ from api.settings import LIGHTEN
32
  from api.utils.file_utils import get_home_cache_dir
33
  from rag.utils import num_tokens_from_string, truncate
34
  import google.generativeai as genai
 
60
  ^_-
61
 
62
  """
63
+ if not LIGHTEN and not DefaultEmbedding._model:
64
  with DefaultEmbedding._model_lock:
65
+ from FlagEmbedding import FlagModel
66
+ import torch
67
  if not DefaultEmbedding._model:
68
  try:
69
  DefaultEmbedding._model = FlagModel(os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)),
rag/llm/rerank_model.py CHANGED
@@ -14,14 +14,14 @@
14
  # limitations under the License.
15
  #
16
  import re
17
- import threading
18
  import requests
19
- import torch
20
- from FlagEmbedding import FlagReranker
21
  from huggingface_hub import snapshot_download
22
  import os
23
  from abc import ABC
24
  import numpy as np
 
 
25
  from api.utils.file_utils import get_home_cache_dir
26
  from rag.utils import num_tokens_from_string, truncate
27
  import json
@@ -53,7 +53,9 @@ class DefaultRerank(Base):
53
  ^_-
54
 
55
  """
56
- if not DefaultRerank._model:
 
 
57
  with DefaultRerank._model_lock:
58
  if not DefaultRerank._model:
59
  try:
 
14
  # limitations under the License.
15
  #
16
  import re
17
+ import threading
18
  import requests
 
 
19
  from huggingface_hub import snapshot_download
20
  import os
21
  from abc import ABC
22
  import numpy as np
23
+
24
+ from api.settings import LIGHTEN
25
  from api.utils.file_utils import get_home_cache_dir
26
  from rag.utils import num_tokens_from_string, truncate
27
  import json
 
53
  ^_-
54
 
55
  """
56
+ if not LIGHTEN and not DefaultRerank._model:
57
+ import torch
58
+ from FlagEmbedding import FlagReranker
59
  with DefaultRerank._model_lock:
60
  if not DefaultRerank._model:
61
  try: