TANVEERMAKHDOOM's picture
Update app.py
f9a9ebf verified
import os
import gdown
import streamlit as st
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from groq import Groq
# Initialize Groq client
client = Groq(api_key=os.environ['GROQ_API_KEY'])
# Download and save PDF using gdown
def download_pdf(url):
output_path = "/tmp/drive_doc.pdf"
try:
gdown.download(url=url, output=output_path, quiet=True, fuzzy=True)
return output_path
except Exception as e:
st.error(f"❌ Download failed: {e}")
return None
# Extract text from PDF
def extract_text(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
content = page.extract_text()
if content:
text += content
return text
# Split text into chunks
def chunk_text(text):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
return splitter.split_text(text)
# Create embeddings and store in FAISS
def build_vector_db(chunks):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return FAISS.from_texts(chunks, embedding=embeddings)
# Query the vector DB and get response from Groq
def query_groq(query, vector_db):
docs = vector_db.similarity_search(query, k=3)
context = "\n".join([doc.page_content for doc in docs])
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{"role": "system", "content": f"Use the following context:\n{context}"},
{"role": "user", "content": query},
]
)
return response.choices[0].message.content
# --- Streamlit App ---
st.title("📄 RAG QA from Google Drive PDF")
link = "https://drive.google.com/file/d/1SGXNLO841VyHnGiX81oo6x2RHIrTmP5S/view?usp=sharing"
st.write("📥 Downloading and processing document...")
pdf_path = download_pdf(link)
if pdf_path:
try:
text = extract_text(pdf_path)
chunks = chunk_text(text)
vector_db = build_vector_db(chunks)
st.success("✅ Document processed successfully.")
except Exception as e:
st.error(f"❌ Error processing PDF: {e}")
vector_db = None
else:
vector_db = None
query = st.text_input("🔍 Enter your query:")
if query and vector_db:
answer = query_groq(query, vector_db)
st.subheader("💬 Answer:")
st.write(answer)
elif query:
st.warning("⚠️ Document not ready yet.")