pnp-chatbot-v1 / app.py
FauziIsyrinApridal
..
0182950
import streamlit as st
import os
from dotenv import load_dotenv
from langsmith import traceable
from datetime import datetime
from typing import List, Dict, Optional
from app.chat import initialize_session_state, display_chat_history
from app.data_loader import get_data, list_all_files, load_docs
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
from app.prompts import sahabat_prompt
from app.db import supabase
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder
load_dotenv()
# ---------------------------------------------------------
# ⚡️ CONFIG
# ---------------------------------------------------------
BUCKET_NAME = "pnp-bot-storage-archive"
VECTOR_STORE_PREFIX = "vector_store"
# ---------------------------------------------------------
# ⚡️ UTILITY
# ---------------------------------------------------------
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
"""Get the latest timestamp from files in a Supabase storage bucket."""
files = list_all_files(bucket_name)
latest_time = 0.0
for file in files:
iso_time = file.get("updated_at") or file.get("created_at")
if iso_time:
try:
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
latest_time = max(latest_time, timestamp)
except Exception as e:
print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
return latest_time
def get_supabase_vector_store_timestamp() -> Optional[str]:
"""Get the latest timestamp of vector store files in the Supabase storage."""
try:
response = supabase.storage.from_(BUCKET_NAME).list()
timestamps = []
for file in response:
if file["name"].startswith(VECTOR_STORE_PREFIX) and (
file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
):
timestamps.append(file["updated_at"])
if len(timestamps) >= 2:
return max(timestamps)
return None
except Exception as e:
print(f"Error getting Supabase timestamp: {e}")
return None
def vector_store_is_outdated() -> bool:
"""Check if vector store needs to be updated based on files in Supabase storage."""
supabase_timestamp = get_supabase_vector_store_timestamp()
if supabase_timestamp is None:
return True
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
return data_time > supabase_time
def reorder_embedding(docs):
"""Reorder documents for long context retrieval."""
reordering = LongContextReorder()
return reordering.transform_documents(docs)
# ---------------------------------------------------------
# ⚡️ RAG CHAIN
# ---------------------------------------------------------
@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
"""Create a Conversational Retrieval Chain for RAG."""
llm = Replicate(
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 10000}
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
return_source_documents=True,
memory=memory,
)
return chain
def get_rag_chain(vector_store):
"""Return a Conversational Retrieval Chain for external use."""
return create_conversational_chain(vector_store)
# ---------------------------------------------------------
# ⚡️ MAIN FUNCTION
# ---------------------------------------------------------
@traceable(name="Main Chatbot RAG App")
def main():
initialize_session_state()
st.set_page_config(
page_title="PNP-Bot",
page_icon="logo-pnp.ico",
)
vector_store = None
if len(st.session_state["history"]) == 0:
if vector_store_is_outdated():
with st.spinner("Memuat dan memproses dokumen..."):
get_data()
docs = load_docs()
if len(docs) > 0:
reordered_docs = reorder_embedding(docs)
vector_store = process_documents(reordered_docs)
with st.spinner("Mengunggah vector store ke Supabase..."):
success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if success:
print("✅ Vector store berhasil diunggah ke Supabase!")
else:
print("❌ Gagal mengunggah vector store ke Supabase.")
else:
print("⚠️ Folder 'data/' kosong. Chatbot tetap bisa digunakan, tetapi tanpa konteks dokumen.")
vector_store = None
else:
with st.spinner("Memuat vector store dari Supabase..."):
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if vector_store:
print("✅ Vector store berhasil dimuat dari Supabase!")
else:
print("❌ Gagal memuat vector store dari Supabase.")
else:
vector_store = st.session_state.get("vector_store") or load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
st.session_state["vector_store"] = vector_store
if st.session_state["vector_store"] is not None:
chain = create_conversational_chain(st.session_state["vector_store"])
display_chat_history(chain)
if __name__ == "__main__":
main()