Talha812 commited on
Commit
ff8d801
·
verified ·
1 Parent(s): 43089b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -26,9 +26,12 @@ def chunk_text(text, chunk_size=500, chunk_overlap=50):
26
  return text_splitter.split_text(text)
27
 
28
  # Function to create embeddings and store them in FAISS
29
- def create_embeddings_and_store(chunks):
30
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
- vector_db = FAISS.from_texts(chunks, embedding=embeddings)
 
 
 
32
  return vector_db
33
 
34
  # Function to query the vector database and interact with Groq
@@ -48,27 +51,29 @@ def query_vector_db(query, vector_db):
48
  return chat_completion.choices[0].message.content
49
 
50
  # Streamlit app
51
- st.title("RAG-Based Application")
52
 
53
- # Upload PDF
54
- uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"], accept_multiple_files=True)
55
 
56
- if uploaded_file:
57
- with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
58
- temp_file.write(uploaded_file.read())
59
- pdf_path = temp_file.name
 
 
60
 
61
- # Extract text
62
- text = extract_text_from_pdf(pdf_path)
63
- st.write("PDF Text Extracted Successfully!")
64
 
65
- # Chunk text
66
- chunks = chunk_text(text)
67
- st.write("Text Chunked Successfully!")
68
 
69
- # Generate embeddings and store in FAISS
70
- vector_db = create_embeddings_and_store(chunks)
71
- st.write("Embeddings Generated and Stored Successfully!")
72
 
73
  # User query input
74
  user_query = st.text_input("Enter your query:")
 
26
  return text_splitter.split_text(text)
27
 
28
  # Function to create embeddings and store them in FAISS
29
+ def create_embeddings_and_store(chunks, vector_db=None):
30
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
+ if vector_db is None:
32
+ vector_db = FAISS.from_texts(chunks, embedding=embeddings)
33
+ else:
34
+ vector_db.add_texts(chunks)
35
  return vector_db
36
 
37
  # Function to query the vector database and interact with Groq
 
51
  return chat_completion.choices[0].message.content
52
 
53
  # Streamlit app
54
+ st.title("RAG-Based Application QA")
55
 
56
+ # Upload PDFs
57
+ uploaded_files = st.file_uploader("Upload PDF documents", type=["pdf"], accept_multiple_files=True)
58
 
59
+ if uploaded_files:
60
+ vector_db = None # Initialize an empty vector DB
61
+ for uploaded_file in uploaded_files:
62
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
63
+ temp_file.write(uploaded_file.read())
64
+ pdf_path = temp_file.name
65
 
66
+ # Extract text
67
+ text = extract_text_from_pdf(pdf_path)
68
+ st.write(f"Text extracted from: {uploaded_file.name}")
69
 
70
+ # Chunk text
71
+ chunks = chunk_text(text)
72
+ st.write(f"Text chunked from: {uploaded_file.name}")
73
 
74
+ # Generate embeddings and store in FAISS
75
+ vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
76
+ st.write(f"Embeddings generated and stored for: {uploaded_file.name}")
77
 
78
  # User query input
79
  user_query = st.text_input("Enter your query:")