KevinHuSh
commited on
Commit
·
8aa5e9b
1
Parent(s):
5aa2501
remove redis (#629)
Browse files### What problem does this PR solve?
### Type of change
- [x] Refactoring
- api/db/services/document_service.py +6 -6
- rag/svr/task_broker.py +0 -6
- rag/svr/task_executor.py +0 -12
api/db/services/document_service.py
CHANGED
@@ -16,6 +16,8 @@
|
|
16 |
from peewee import Expression
|
17 |
|
18 |
from elasticsearch_dsl import Q
|
|
|
|
|
19 |
from rag.utils.es_conn import ELASTICSEARCH
|
20 |
from rag.utils.minio_conn import MINIO
|
21 |
from rag.nlp import search
|
@@ -90,7 +92,7 @@ class DocumentService(CommonService):
|
|
90 |
|
91 |
@classmethod
|
92 |
@DB.connection_context()
|
93 |
-
def get_newly_uploaded(cls, tm
|
94 |
fields = [
|
95 |
cls.model.id,
|
96 |
cls.model.kb_id,
|
@@ -112,11 +114,9 @@ class DocumentService(CommonService):
|
|
112 |
cls.model.status == StatusEnum.VALID.value,
|
113 |
~(cls.model.type == FileType.VIRTUAL.value),
|
114 |
cls.model.progress == 0,
|
115 |
-
cls.model.update_time >=
|
116 |
-
cls.model.run == TaskStatus.RUNNING.value
|
117 |
-
|
118 |
-
.order_by(cls.model.update_time.asc())\
|
119 |
-
.paginate(1, items_per_page)
|
120 |
return list(docs.dicts())
|
121 |
|
122 |
@classmethod
|
|
|
16 |
from peewee import Expression
|
17 |
|
18 |
from elasticsearch_dsl import Q
|
19 |
+
|
20 |
+
from api.utils import current_timestamp
|
21 |
from rag.utils.es_conn import ELASTICSEARCH
|
22 |
from rag.utils.minio_conn import MINIO
|
23 |
from rag.nlp import search
|
|
|
92 |
|
93 |
@classmethod
|
94 |
@DB.connection_context()
|
95 |
+
def get_newly_uploaded(cls, tm):
|
96 |
fields = [
|
97 |
cls.model.id,
|
98 |
cls.model.kb_id,
|
|
|
114 |
cls.model.status == StatusEnum.VALID.value,
|
115 |
~(cls.model.type == FileType.VIRTUAL.value),
|
116 |
cls.model.progress == 0,
|
117 |
+
cls.model.update_time >= current_timestamp() - 1000 * 600,
|
118 |
+
cls.model.run == TaskStatus.RUNNING.value)\
|
119 |
+
.order_by(cls.model.update_time.asc())
|
|
|
|
|
120 |
return list(docs.dicts())
|
121 |
|
122 |
@classmethod
|
rag/svr/task_broker.py
CHANGED
@@ -90,12 +90,6 @@ def dispatch():
|
|
90 |
try:
|
91 |
bucket, name = File2DocumentService.get_minio_address(doc_id=r["id"])
|
92 |
file_bin = MINIO.get(bucket, name)
|
93 |
-
if REDIS_CONN.is_alive():
|
94 |
-
try:
|
95 |
-
REDIS_CONN.set("{}/{}".format(bucket, name), file_bin, 12*60)
|
96 |
-
except Exception as e:
|
97 |
-
cron_logger.warning("Put into redis[EXCEPTION]:" + str(e))
|
98 |
-
|
99 |
if r["type"] == FileType.PDF.value:
|
100 |
do_layout = r["parser_config"].get("layout_recognize", True)
|
101 |
pages = PdfParser.total_page_number(r["name"], file_bin)
|
|
|
90 |
try:
|
91 |
bucket, name = File2DocumentService.get_minio_address(doc_id=r["id"])
|
92 |
file_bin = MINIO.get(bucket, name)
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if r["type"] == FileType.PDF.value:
|
94 |
do_layout = r["parser_config"].get("layout_recognize", True)
|
95 |
pages = PdfParser.total_page_number(r["name"], file_bin)
|
rag/svr/task_executor.py
CHANGED
@@ -107,18 +107,6 @@ def collect(comm, mod, tm):
|
|
107 |
|
108 |
def get_minio_binary(bucket, name):
|
109 |
global MINIO
|
110 |
-
if REDIS_CONN.is_alive():
|
111 |
-
try:
|
112 |
-
for _ in range(30):
|
113 |
-
if REDIS_CONN.exist("{}/{}".format(bucket, name)):
|
114 |
-
time.sleep(1)
|
115 |
-
break
|
116 |
-
time.sleep(1)
|
117 |
-
r = REDIS_CONN.get("{}/{}".format(bucket, name))
|
118 |
-
if r: return r
|
119 |
-
cron_logger.warning("Cache missing: {}".format(name))
|
120 |
-
except Exception as e:
|
121 |
-
cron_logger.warning("Get redis[EXCEPTION]:" + str(e))
|
122 |
return MINIO.get(bucket, name)
|
123 |
|
124 |
|
|
|
107 |
|
108 |
def get_minio_binary(bucket, name):
|
109 |
global MINIO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
return MINIO.get(bucket, name)
|
111 |
|
112 |
|