Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import streamlit as st
|
2 |
from PyPDF2 import PdfReader
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
import os
|
@@ -81,6 +81,112 @@ def user_input(user_question):
|
|
81 |
|
82 |
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def main():
|
85 |
st.set_page_config("Chat PDF")
|
86 |
st.header("Chat with PDF using Gemini💁")
|
|
|
1 |
+
'''import streamlit as st
|
2 |
from PyPDF2 import PdfReader
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
import os
|
|
|
81 |
|
82 |
|
83 |
|
84 |
+
def main():
|
85 |
+
st.set_page_config("Chat PDF")
|
86 |
+
st.header("Chat with PDF using Gemini💁")
|
87 |
+
|
88 |
+
user_question = st.text_input("Ask a Question from the PDF Files")
|
89 |
+
|
90 |
+
if user_question:
|
91 |
+
user_input(user_question)
|
92 |
+
|
93 |
+
with st.sidebar:
|
94 |
+
st.title("Menu:")
|
95 |
+
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
|
96 |
+
if st.button("Submit & Process"):
|
97 |
+
with st.spinner("Processing..."):
|
98 |
+
raw_text = get_pdf_text(pdf_docs)
|
99 |
+
text_chunks = get_text_chunks(raw_text)
|
100 |
+
get_vector_store(text_chunks)
|
101 |
+
st.success("Done")
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
main()'''
|
107 |
+
import streamlit as st
|
108 |
+
from PyPDF2 import PdfReader
|
109 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
110 |
+
import os
|
111 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
112 |
+
import google.generativeai as genai
|
113 |
+
from langchain.vectorstores import FAISS
|
114 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
115 |
+
from langchain.chains.question_answering import load_qa_chain
|
116 |
+
from langchain.prompts import PromptTemplate
|
117 |
+
from dotenv import load_dotenv
|
118 |
+
|
119 |
+
load_dotenv()
|
120 |
+
os.getenv("GOOGLE_API_KEY")
|
121 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
def get_pdf_text(pdf_docs):
|
129 |
+
text=""
|
130 |
+
for pdf in pdf_docs:
|
131 |
+
pdf_reader= PdfReader(pdf)
|
132 |
+
for page in pdf_reader.pages:
|
133 |
+
text+= page.extract_text()
|
134 |
+
return text
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
def get_text_chunks(text):
|
139 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
140 |
+
chunks = text_splitter.split_text(text)
|
141 |
+
return chunks
|
142 |
+
|
143 |
+
|
144 |
+
def get_vector_store(text_chunks):
|
145 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
146 |
+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
147 |
+
vector_store.save_local("faiss_index")
|
148 |
+
|
149 |
+
|
150 |
+
def get_conversational_chain():
|
151 |
+
|
152 |
+
prompt_template = """
|
153 |
+
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
154 |
+
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
155 |
+
Context:\n {context}?\n
|
156 |
+
Question: \n{question}\n
|
157 |
+
|
158 |
+
Answer:
|
159 |
+
"""
|
160 |
+
|
161 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
162 |
+
temperature=0.3)
|
163 |
+
|
164 |
+
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
165 |
+
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
166 |
+
|
167 |
+
return chain
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
def user_input(user_question):
|
172 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
173 |
+
|
174 |
+
new_db = FAISS.load_local("faiss_index", embeddings)
|
175 |
+
docs = new_db.similarity_search(user_question)
|
176 |
+
|
177 |
+
chain = get_conversational_chain()
|
178 |
+
|
179 |
+
|
180 |
+
response = chain(
|
181 |
+
{"input_documents":docs, "question": user_question}
|
182 |
+
, return_only_outputs=True)
|
183 |
+
|
184 |
+
print(response)
|
185 |
+
st.write("Reply: ", response["output_text"])
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
def main():
|
191 |
st.set_page_config("Chat PDF")
|
192 |
st.header("Chat with PDF using Gemini💁")
|