import os import logging # --- Logging Setup --- logger = logging.getLogger(__name__) if not logger.handlers: logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) # --- Configuration Constants --- _BOT_API_KEY_ENV = os.getenv('BOT_API_KEY') GROQ_API_KEY = _BOT_API_KEY_ENV if not GROQ_API_KEY: logger.critical("CRITICAL: BOT_API_KEY environment variable not found. Groq services will fail.") FALLBACK_LLM_MODEL_NAME = os.getenv("GROQ_FALLBACK_MODEL", "llama-3.3-70b-versatile") _MODULE_BASE_DIR = os.path.dirname(os.path.abspath(__file__)) RAG_FAISS_INDEX_SUBDIR_NAME = "faiss_index" RAG_STORAGE_PARENT_DIR = os.getenv("RAG_STORAGE_DIR", os.path.join(_MODULE_BASE_DIR, "faiss_storage")) RAG_SOURCES_DIR = os.getenv("SOURCES_DIR", os.path.join(_MODULE_BASE_DIR, "sources")) RAG_CHUNKED_SOURCES_FILENAME = "pre_chunked_sources.json" os.makedirs(RAG_SOURCES_DIR, exist_ok=True) os.makedirs(RAG_STORAGE_PARENT_DIR, exist_ok=True) # Embedding and model configuration RAG_EMBEDDING_MODEL_NAME = os.getenv("RAG_EMBEDDING_MODEL", "BAAI/bge-small-en") RAG_EMBEDDING_USE_GPU = os.getenv("RAG_EMBEDDING_GPU", "False").lower() == "true" RAG_LLM_MODEL_NAME = os.getenv("RAG_LLM_MODEL", "llama-3.3-70b-versatile") RAG_LLM_TEMPERATURE = float(os.getenv("RAG_TEMPERATURE", 0.1)) RAG_LOAD_INDEX_ON_STARTUP = os.getenv("RAG_LOAD_INDEX", "True").lower() == "true" # MODIFIED: New retrieval and reranking K values for explicit control RAG_INITIAL_FETCH_K = int(os.getenv("RAG_INITIAL_FETCH_K", 20)) RAG_RERANKER_K = int(os.getenv("RAG_RERANKER_K", 5)) # Incremental update limit RAG_MAX_FILES_FOR_INCREMENTAL = int(os.getenv("RAG_MAX_FILES_FOR_INCREMENTAL", "50")) # Chunk configuration RAG_CHUNK_SIZE = int(os.getenv("RAG_CHUNK_SIZE", 1000)) RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", 150)) # Reranker configuration RAG_RERANKER_MODEL_NAME = os.getenv("RAG_RERANKER_MODEL", "jinaai/jina-reranker-v2-base-multilingual") RAG_RERANKER_ENABLED = os.getenv("RAG_RERANKER_ENABLED", "True").lower() == "true" # GDrive configuration for RAG sources GDRIVE_SOURCES_ENABLED = os.getenv("GDRIVE_SOURCES_ENABLED", "False").lower() == "true" GDRIVE_FOLDER_ID_OR_URL = os.getenv("GDRIVE_FOLDER_URL") # GDrive configuration for downloading a pre-built FAISS index GDRIVE_INDEX_ENABLED = os.getenv("GDRIVE_INDEX_ENABLED", "False").lower() == "true" GDRIVE_INDEX_ID_OR_URL = os.getenv("GDRIVE_INDEX_URL") # --- NEW: GDrive configuration for downloading users.csv --- GDRIVE_USERS_CSV_ENABLED = os.getenv("GDRIVE_USERS_CSV_ENABLED", "False").lower() == "true" GDRIVE_USERS_CSV_ID_OR_URL = os.getenv("GDRIVE_USERS_CSV_URL") # Detailed logging configuration RAG_DETAILED_LOGGING = os.getenv("RAG_DETAILED_LOGGING", "True").lower() == "true" # --- End of Configuration Constants --- logger.info(f"RAG Configuration Loaded - Chunk Size: {RAG_CHUNK_SIZE}, Chunk Overlap: {RAG_CHUNK_OVERLAP}") logger.info(f"Embedding Model: {RAG_EMBEDDING_MODEL_NAME}") logger.info(f"Reranker Model: {RAG_RERANKER_MODEL_NAME}") logger.info(f"Retrieval Pipeline: Initial Fetch K={RAG_INITIAL_FETCH_K}, Reranker Final K={RAG_RERANKER_K}") logger.info(f"Detailed Logging: {'ENABLED' if RAG_DETAILED_LOGGING else 'DISABLED'}") logger.info(f"GDrive Sources Download: {'ENABLED' if GDRIVE_SOURCES_ENABLED else 'DISABLED'}") logger.info(f"GDrive Pre-built Index Download: {'ENABLED' if GDRIVE_INDEX_ENABLED else 'DISABLED'}") logger.info(f"GDrive users.csv Download: {'ENABLED' if GDRIVE_USERS_CSV_ENABLED else 'DISABLED'}")