SakibAhmed commited on
Commit
56313b7
·
verified ·
1 Parent(s): eac6673

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +22 -4
  2. config.py +8 -2
  3. utils.py +30 -0
app.py CHANGED
@@ -30,13 +30,15 @@ load_dotenv()
30
  from llm_fallback import get_groq_fallback_response
31
  from rag_system import initialize_and_get_rag_system
32
  from rag_components import KnowledgeRAG
33
- from utils import download_and_unzip_gdrive_file # MODIFIED: Import the new utility
34
  from config import (
35
  RAG_SOURCES_DIR,
36
  RAG_STORAGE_PARENT_DIR,
37
  RAG_CHUNKED_SOURCES_FILENAME,
38
- GDRIVE_INDEX_ENABLED, # MODIFIED: Import new config
39
- GDRIVE_INDEX_ID_OR_URL # MODIFIED: Import new config
 
 
40
  )
41
 
42
  # Setup logging (remains global for the app)
@@ -1082,7 +1084,23 @@ if __name__ == '__main__':
1082
  TEXT_EXTRACTIONS_DIR]:
1083
  os.makedirs(folder_path, exist_ok=True)
1084
 
1085
- # MODIFIED: Load users from CSV at startup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1086
  load_users_from_csv()
1087
 
1088
  load_qa_data_on_startup()
 
30
  from llm_fallback import get_groq_fallback_response
31
  from rag_system import initialize_and_get_rag_system
32
  from rag_components import KnowledgeRAG
33
+ from utils import download_and_unzip_gdrive_file, download_gdrive_file # MODIFIED: Import the new utility
34
  from config import (
35
  RAG_SOURCES_DIR,
36
  RAG_STORAGE_PARENT_DIR,
37
  RAG_CHUNKED_SOURCES_FILENAME,
38
+ GDRIVE_INDEX_ENABLED,
39
+ GDRIVE_INDEX_ID_OR_URL,
40
+ GDRIVE_USERS_CSV_ENABLED, # NEW
41
+ GDRIVE_USERS_CSV_ID_OR_URL # NEW
42
  )
43
 
44
  # Setup logging (remains global for the app)
 
1084
  TEXT_EXTRACTIONS_DIR]:
1085
  os.makedirs(folder_path, exist_ok=True)
1086
 
1087
+ # --- NEW: Download users.csv from GDrive if enabled ---
1088
+ if GDRIVE_USERS_CSV_ENABLED:
1089
+ logger.info("[GDRIVE_USERS_DOWNLOAD] Google Drive users.csv download is ENABLED.")
1090
+ if GDRIVE_USERS_CSV_ID_OR_URL:
1091
+ users_csv_target_path = os.path.join(_APP_BASE_DIR, 'assets', 'users.csv')
1092
+ logger.info(f"[GDRIVE_USERS_DOWNLOAD] Attempting to download users.csv to: {users_csv_target_path}")
1093
+ download_successful = download_gdrive_file(GDRIVE_USERS_CSV_ID_OR_URL, users_csv_target_path)
1094
+ if download_successful:
1095
+ logger.info("[GDRIVE_USERS_DOWNLOAD] Successfully downloaded users.csv.")
1096
+ else:
1097
+ logger.error("[GDRIVE_USERS_DOWNLOAD] Failed to download users.csv from Google Drive. Will use existing file or fallback.")
1098
+ else:
1099
+ logger.warning("[GDRIVE_USERS_DOWNLOAD] GDRIVE_USERS_CSV_ENABLED is True, but GDRIVE_USERS_CSV_URL is not set.")
1100
+ else:
1101
+ logger.info("[GDRIVE_USERS_DOWNLOAD] Google Drive users.csv download is DISABLED.")
1102
+
1103
+ # Load users from CSV at startup (will use the downloaded file if successful)
1104
  load_users_from_csv()
1105
 
1106
  load_qa_data_on_startup()
config.py CHANGED
@@ -48,13 +48,18 @@ RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", 150))
48
  RAG_RERANKER_MODEL_NAME = os.getenv("RAG_RERANKER_MODEL", "jinaai/jina-reranker-v2-base-multilingual")
49
  RAG_RERANKER_ENABLED = os.getenv("RAG_RERANKER_ENABLED", "True").lower() == "true"
50
 
 
51
  GDRIVE_SOURCES_ENABLED = os.getenv("GDRIVE_SOURCES_ENABLED", "False").lower() == "true"
52
  GDRIVE_FOLDER_ID_OR_URL = os.getenv("GDRIVE_FOLDER_URL")
53
 
54
- # MODIFIED: New configuration for downloading a pre-built FAISS index
55
  GDRIVE_INDEX_ENABLED = os.getenv("GDRIVE_INDEX_ENABLED", "False").lower() == "true"
56
  GDRIVE_INDEX_ID_OR_URL = os.getenv("GDRIVE_INDEX_URL")
57
 
 
 
 
 
58
 
59
  # Detailed logging configuration
60
  RAG_DETAILED_LOGGING = os.getenv("RAG_DETAILED_LOGGING", "True").lower() == "true"
@@ -67,4 +72,5 @@ logger.info(f"Reranker Model: {RAG_RERANKER_MODEL_NAME}")
67
  logger.info(f"Retrieval Pipeline: Initial Fetch K={RAG_INITIAL_FETCH_K}, Reranker Final K={RAG_RERANKER_K}")
68
  logger.info(f"Detailed Logging: {'ENABLED' if RAG_DETAILED_LOGGING else 'DISABLED'}")
69
  logger.info(f"GDrive Sources Download: {'ENABLED' if GDRIVE_SOURCES_ENABLED else 'DISABLED'}")
70
- logger.info(f"GDrive Pre-built Index Download: {'ENABLED' if GDRIVE_INDEX_ENABLED else 'DISABLED'}")
 
 
48
  RAG_RERANKER_MODEL_NAME = os.getenv("RAG_RERANKER_MODEL", "jinaai/jina-reranker-v2-base-multilingual")
49
  RAG_RERANKER_ENABLED = os.getenv("RAG_RERANKER_ENABLED", "True").lower() == "true"
50
 
51
+ # GDrive configuration for RAG sources
52
  GDRIVE_SOURCES_ENABLED = os.getenv("GDRIVE_SOURCES_ENABLED", "False").lower() == "true"
53
  GDRIVE_FOLDER_ID_OR_URL = os.getenv("GDRIVE_FOLDER_URL")
54
 
55
+ # GDrive configuration for downloading a pre-built FAISS index
56
  GDRIVE_INDEX_ENABLED = os.getenv("GDRIVE_INDEX_ENABLED", "False").lower() == "true"
57
  GDRIVE_INDEX_ID_OR_URL = os.getenv("GDRIVE_INDEX_URL")
58
 
59
+ # --- NEW: GDrive configuration for downloading users.csv ---
60
+ GDRIVE_USERS_CSV_ENABLED = os.getenv("GDRIVE_USERS_CSV_ENABLED", "False").lower() == "true"
61
+ GDRIVE_USERS_CSV_ID_OR_URL = os.getenv("GDRIVE_USERS_CSV_URL")
62
+
63
 
64
  # Detailed logging configuration
65
  RAG_DETAILED_LOGGING = os.getenv("RAG_DETAILED_LOGGING", "True").lower() == "true"
 
72
  logger.info(f"Retrieval Pipeline: Initial Fetch K={RAG_INITIAL_FETCH_K}, Reranker Final K={RAG_RERANKER_K}")
73
  logger.info(f"Detailed Logging: {'ENABLED' if RAG_DETAILED_LOGGING else 'DISABLED'}")
74
  logger.info(f"GDrive Sources Download: {'ENABLED' if GDRIVE_SOURCES_ENABLED else 'DISABLED'}")
75
+ logger.info(f"GDrive Pre-built Index Download: {'ENABLED' if GDRIVE_INDEX_ENABLED else 'DISABLED'}")
76
+ logger.info(f"GDrive users.csv Download: {'ENABLED' if GDRIVE_USERS_CSV_ENABLED else 'DISABLED'}")
utils.py CHANGED
@@ -66,6 +66,36 @@ def get_id_from_gdrive_input(url_or_id: str) -> Optional[str]:
66
  logger.warning(f"Could not reliably extract Google Drive ID from input: {url_or_id}")
67
  return None
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def download_and_unzip_gdrive_file(file_id_or_url: str, target_extraction_dir: str) -> bool:
71
  """
 
66
  logger.warning(f"Could not reliably extract Google Drive ID from input: {url_or_id}")
67
  return None
68
 
69
+ def download_gdrive_file(file_id_or_url: str, target_path: str) -> bool:
70
+ """
71
+ Downloads a single file from Google Drive to a specific path.
72
+ """
73
+ logger.info(f"[GDRIVE_SINGLE_FILE] Attempting to download file. Input: {file_id_or_url}")
74
+
75
+ file_id = get_id_from_gdrive_input(file_id_or_url)
76
+ if not file_id:
77
+ logger.error(f"[GDRIVE_SINGLE_FILE] Invalid Google Drive File ID or URL provided: {file_id_or_url}")
78
+ return False
79
+
80
+ try:
81
+ # Ensure the target directory exists before downloading
82
+ target_dir = os.path.dirname(target_path)
83
+ os.makedirs(target_dir, exist_ok=True)
84
+
85
+ logger.info(f"[GDRIVE_SINGLE_FILE] Downloading file ID: {file_id} to path: {target_path}")
86
+ # Use gdown to download directly to the target file path, fuzzy=True helps with some permissions
87
+ gdown.download(id=file_id, output=target_path, quiet=False, fuzzy=True)
88
+
89
+ if not os.path.exists(target_path) or os.path.getsize(target_path) == 0:
90
+ logger.error("[GDRIVE_SINGLE_FILE] Download failed or the resulting file is empty.")
91
+ return False
92
+
93
+ logger.info(f"[GDRIVE_SINGLE_FILE] Download successful.")
94
+ return True
95
+
96
+ except Exception as e:
97
+ logger.error(f"[GDRIVE_SINGLE_FILE] An error occurred during download: {e}", exc_info=True)
98
+ return False
99
 
100
  def download_and_unzip_gdrive_file(file_id_or_url: str, target_extraction_dir: str) -> bool:
101
  """