Kenny Dizi
		
	commited on
		
		
					Commit 
							
							·
						
						f74a214
	
1
								Parent(s):
							
							cae0eab
								
Improve storage engine (#4341)
Browse files### What problem does this PR solve?
- Bring `STORAGE_IMPL` back in `rag/svr/cache_file_svr.py`
- Simplify storage connection when working with AWS S3
### Type of change
- [x] Refactoring
- conf/service_conf.yaml +1 -2
- docker/service_conf.yaml.template +1 -2
- rag/svr/cache_file_svr.py +2 -2
- rag/utils/s3_conn.py +2 -14
    	
        conf/service_conf.yaml
    CHANGED
    
    | @@ -22,7 +22,7 @@ infinity: | |
| 22 | 
             
              db_name: 'default_db'
         | 
| 23 | 
             
            redis:
         | 
| 24 | 
             
              db: 1
         | 
| 25 | 
            -
              password: 'infini_rag_flow' | 
| 26 | 
             
              host: 'redis:6379'
         | 
| 27 |  | 
| 28 | 
             
            # postgres:
         | 
| @@ -34,7 +34,6 @@ redis: | |
| 34 | 
             
            #   max_connections: 100
         | 
| 35 | 
             
            #   stale_timeout: 30
         | 
| 36 | 
             
            # s3:
         | 
| 37 | 
            -
            #   endpoint: 'endpoint'
         | 
| 38 | 
             
            #   access_key: 'access_key'
         | 
| 39 | 
             
            #   secret_key: 'secret_key'
         | 
| 40 | 
             
            #   region: 'region'
         | 
|  | |
| 22 | 
             
              db_name: 'default_db'
         | 
| 23 | 
             
            redis:
         | 
| 24 | 
             
              db: 1
         | 
| 25 | 
            +
              password: 'infini_rag_flow'
         | 
| 26 | 
             
              host: 'redis:6379'
         | 
| 27 |  | 
| 28 | 
             
            # postgres:
         | 
|  | |
| 34 | 
             
            #   max_connections: 100
         | 
| 35 | 
             
            #   stale_timeout: 30
         | 
| 36 | 
             
            # s3:
         | 
|  | |
| 37 | 
             
            #   access_key: 'access_key'
         | 
| 38 | 
             
            #   secret_key: 'secret_key'
         | 
| 39 | 
             
            #   region: 'region'
         | 
    	
        docker/service_conf.yaml.template
    CHANGED
    
    | @@ -22,7 +22,7 @@ infinity: | |
| 22 | 
             
              db_name: 'default_db'
         | 
| 23 | 
             
            redis:
         | 
| 24 | 
             
              db: 1
         | 
| 25 | 
            -
              password: '${REDIS_PASSWORD:-infini_rag_flow}' | 
| 26 | 
             
              host: '${REDIS_HOST:-redis}:6379'
         | 
| 27 |  | 
| 28 | 
             
            # postgres:
         | 
| @@ -34,7 +34,6 @@ redis: | |
| 34 | 
             
            #   max_connections: 100
         | 
| 35 | 
             
            #   stale_timeout: 30
         | 
| 36 | 
             
            # s3:
         | 
| 37 | 
            -
            #   endpoint: 'endpoint'
         | 
| 38 | 
             
            #   access_key: 'access_key'
         | 
| 39 | 
             
            #   secret_key: 'secret_key'
         | 
| 40 | 
             
            #   region: 'region'
         | 
|  | |
| 22 | 
             
              db_name: 'default_db'
         | 
| 23 | 
             
            redis:
         | 
| 24 | 
             
              db: 1
         | 
| 25 | 
            +
              password: '${REDIS_PASSWORD:-infini_rag_flow}'
         | 
| 26 | 
             
              host: '${REDIS_HOST:-redis}:6379'
         | 
| 27 |  | 
| 28 | 
             
            # postgres:
         | 
|  | |
| 34 | 
             
            #   max_connections: 100
         | 
| 35 | 
             
            #   stale_timeout: 30
         | 
| 36 | 
             
            # s3:
         | 
|  | |
| 37 | 
             
            #   access_key: 'access_key'
         | 
| 38 | 
             
            #   secret_key: 'secret_key'
         | 
| 39 | 
             
            #   region: 'region'
         | 
    	
        rag/svr/cache_file_svr.py
    CHANGED
    
    | @@ -19,7 +19,7 @@ import traceback | |
| 19 |  | 
| 20 | 
             
            from api.db.db_models import close_connection
         | 
| 21 | 
             
            from api.db.services.task_service import TaskService
         | 
| 22 | 
            -
            from rag.utils. | 
| 23 | 
             
            from rag.utils.redis_conn import REDIS_CONN
         | 
| 24 |  | 
| 25 |  | 
| @@ -44,7 +44,7 @@ def main(): | |
| 44 | 
             
                                key = "{}/{}".format(kb_id, loc)
         | 
| 45 | 
             
                                if REDIS_CONN.exist(key):
         | 
| 46 | 
             
                                    continue
         | 
| 47 | 
            -
                                file_bin =  | 
| 48 | 
             
                                REDIS_CONN.transaction(key, file_bin, 12 * 60)
         | 
| 49 | 
             
                                logging.info("CACHE: {}".format(loc))
         | 
| 50 | 
             
                            except Exception as e:
         | 
|  | |
| 19 |  | 
| 20 | 
             
            from api.db.db_models import close_connection
         | 
| 21 | 
             
            from api.db.services.task_service import TaskService
         | 
| 22 | 
            +
            from rag.utils.storage_factory import STORAGE_IMPL
         | 
| 23 | 
             
            from rag.utils.redis_conn import REDIS_CONN
         | 
| 24 |  | 
| 25 |  | 
|  | |
| 44 | 
             
                                key = "{}/{}".format(kb_id, loc)
         | 
| 45 | 
             
                                if REDIS_CONN.exist(key):
         | 
| 46 | 
             
                                    continue
         | 
| 47 | 
            +
                                file_bin = STORAGE_IMPL.get(kb_id, loc)
         | 
| 48 | 
             
                                REDIS_CONN.transaction(key, file_bin, 12 * 60)
         | 
| 49 | 
             
                                logging.info("CACHE: {}".format(loc))
         | 
| 50 | 
             
                            except Exception as e:
         | 
    	
        rag/utils/s3_conn.py
    CHANGED
    
    | @@ -1,7 +1,6 @@ | |
| 1 | 
             
            import logging
         | 
| 2 | 
             
            import boto3
         | 
| 3 | 
             
            from botocore.exceptions import ClientError
         | 
| 4 | 
            -
            from botocore.client import Config
         | 
| 5 | 
             
            import time
         | 
| 6 | 
             
            from io import BytesIO
         | 
| 7 | 
             
            from rag.utils import singleton
         | 
| @@ -12,7 +11,6 @@ class RAGFlowS3(object): | |
| 12 | 
             
                def __init__(self):
         | 
| 13 | 
             
                    self.conn = None
         | 
| 14 | 
             
                    self.s3_config = settings.S3
         | 
| 15 | 
            -
                    self.endpoint = self.s3_config.get('endpoint', None)
         | 
| 16 | 
             
                    self.access_key = self.s3_config.get('access_key', None)
         | 
| 17 | 
             
                    self.secret_key = self.s3_config.get('secret_key', None)
         | 
| 18 | 
             
                    self.region = self.s3_config.get('region', None)
         | 
| @@ -26,24 +24,14 @@ class RAGFlowS3(object): | |
| 26 | 
             
                        pass
         | 
| 27 |  | 
| 28 | 
             
                    try:
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                        config = Config(
         | 
| 31 | 
            -
                            s3={
         | 
| 32 | 
            -
                                'addressing_style': 'virtual'
         | 
| 33 | 
            -
                            }
         | 
| 34 | 
            -
                        )
         | 
| 35 | 
            -
             | 
| 36 | 
             
                        self.conn = boto3.client(
         | 
| 37 | 
             
                            's3',
         | 
| 38 | 
            -
                            endpoint_url=self.endpoint,
         | 
| 39 | 
             
                            region_name=self.region,
         | 
| 40 | 
             
                            aws_access_key_id=self.access_key,
         | 
| 41 | 
            -
                            aws_secret_access_key=self.secret_key | 
| 42 | 
            -
                            config=config
         | 
| 43 | 
             
                        )
         | 
| 44 | 
             
                    except Exception:
         | 
| 45 | 
            -
                        logging.exception(
         | 
| 46 | 
            -
                            "Fail to connect %s" % self.endpoint)
         | 
| 47 |  | 
| 48 | 
             
                def __close__(self):
         | 
| 49 | 
             
                    del self.conn
         | 
|  | |
| 1 | 
             
            import logging
         | 
| 2 | 
             
            import boto3
         | 
| 3 | 
             
            from botocore.exceptions import ClientError
         | 
|  | |
| 4 | 
             
            import time
         | 
| 5 | 
             
            from io import BytesIO
         | 
| 6 | 
             
            from rag.utils import singleton
         | 
|  | |
| 11 | 
             
                def __init__(self):
         | 
| 12 | 
             
                    self.conn = None
         | 
| 13 | 
             
                    self.s3_config = settings.S3
         | 
|  | |
| 14 | 
             
                    self.access_key = self.s3_config.get('access_key', None)
         | 
| 15 | 
             
                    self.secret_key = self.s3_config.get('secret_key', None)
         | 
| 16 | 
             
                    self.region = self.s3_config.get('region', None)
         | 
|  | |
| 24 | 
             
                        pass
         | 
| 25 |  | 
| 26 | 
             
                    try:
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 27 | 
             
                        self.conn = boto3.client(
         | 
| 28 | 
             
                            's3',
         | 
|  | |
| 29 | 
             
                            region_name=self.region,
         | 
| 30 | 
             
                            aws_access_key_id=self.access_key,
         | 
| 31 | 
            +
                            aws_secret_access_key=self.secret_key
         | 
|  | |
| 32 | 
             
                        )
         | 
| 33 | 
             
                    except Exception:
         | 
| 34 | 
            +
                        logging.exception(f"Fail to connect at region {self.region}")
         | 
|  | |
| 35 |  | 
| 36 | 
             
                def __close__(self):
         | 
| 37 | 
             
                    del self.conn
         |