shamilcoded commited on
Commit
c1a9c71
Β·
verified Β·
1 Parent(s): aaf9746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -17
app.py CHANGED
@@ -1,25 +1,21 @@
1
  import streamlit as st
2
  import os
3
  import tempfile
4
- import fitz # PyMuPDF for PDFs
5
  import docx
6
  import openpyxl
7
  import faiss
8
 
 
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
10
- from langchain_community.llms import Groq
11
  from langchain.vectorstores import FAISS
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.docstore.document import Document
14
- from langchain.chains import RetrievalQA
15
 
16
- # Load LLM (API key from Hugging Face secrets)
17
- llm = Groq(
18
- model="llama3-8b-8192",
19
- api_key=os.getenv("GROQ_API_KEY")
20
- )
21
 
22
- # Embeddings model
23
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
24
 
25
  # File readers
@@ -58,30 +54,52 @@ def process_file(uploaded_file):
58
  else:
59
  return "Unsupported file type."
60
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # Streamlit App
62
  st.set_page_config(page_title="DocuQuery AI", layout="centered")
63
  st.title("πŸ“„ DocuQuery AI")
64
- st.markdown("Upload a document (PDF, Word, or Excel) and ask questions about its content using LLaMA3.")
65
 
66
  uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx", "xlsx"])
67
 
68
  if uploaded_file:
69
  st.success("βœ… File uploaded successfully.")
70
- with st.spinner("Reading and processing file..."):
71
  raw_text = process_file(uploaded_file)
72
 
73
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
74
  docs = [Document(page_content=chunk) for chunk in splitter.split_text(raw_text)]
75
 
76
- with st.spinner("Indexing document with FAISS..."):
77
  db = FAISS.from_documents(docs, embedding_model)
78
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
79
- qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
80
 
81
- st.success("πŸ“š Document indexed. Ask your question below!")
82
 
83
  user_query = st.text_input("❓ Ask something about the document:")
84
  if user_query:
85
- with st.spinner("Generating answer..."):
86
- response = qa_chain.run(user_query)
87
- st.markdown(f"**πŸ’¬ Answer:** {response}")
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
3
  import tempfile
4
+ import fitz
5
  import docx
6
  import openpyxl
7
  import faiss
8
 
9
+ from groq import Groq
10
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
11
  from langchain.vectorstores import FAISS
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.docstore.document import Document
 
14
 
15
+ # Initialize Groq client
16
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
17
 
18
+ # Embedding model
19
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  # File readers
 
54
  else:
55
  return "Unsupported file type."
56
 
57
+ # Prompt builder
58
+ def build_prompt(context, question):
59
+ return f"""You are a helpful assistant. Answer the question based only on the context provided below.
60
+
61
+ Context:
62
+ {context}
63
+
64
+ Question:
65
+ {question}
66
+
67
+ Answer:"""
68
+
69
  # Streamlit App
70
  st.set_page_config(page_title="DocuQuery AI", layout="centered")
71
  st.title("πŸ“„ DocuQuery AI")
72
+ st.markdown("Upload a document and ask questions about it using LLaMA-3 from Groq.")
73
 
74
  uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx", "xlsx"])
75
 
76
  if uploaded_file:
77
  st.success("βœ… File uploaded successfully.")
78
+ with st.spinner("Processing file..."):
79
  raw_text = process_file(uploaded_file)
80
 
81
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
82
  docs = [Document(page_content=chunk) for chunk in splitter.split_text(raw_text)]
83
 
84
+ with st.spinner("Embedding & indexing..."):
85
  db = FAISS.from_documents(docs, embedding_model)
86
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
 
87
 
88
+ st.success("πŸ“š Document indexed. Ask a question!")
89
 
90
  user_query = st.text_input("❓ Ask something about the document:")
91
  if user_query:
92
+ with st.spinner("Generating response..."):
93
+ retrieved_docs = retriever.get_relevant_documents(user_query)
94
+ context = "\n".join([doc.page_content for doc in retrieved_docs])
95
+
96
+ prompt = build_prompt(context, user_query)
97
+
98
+ response = groq_client.chat.completions.create(
99
+ model="llama3-8b-8192",
100
+ messages=[
101
+ {"role": "user", "content": prompt}
102
+ ]
103
+ )
104
+
105
+ st.markdown(f"**πŸ’¬ Answer:** {response.choices[0].message.content}")