Spaces:

Mattral
/

Organized-Data-Chat

Runtime error

App Files Files Community

Mattral commited on May 14, 2024

Commit

4c6bffd

verified ·

1 Parent(s): 973fa76

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -25

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-from gradio_pdf import PDF
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
@@ -24,6 +23,10 @@ llm = AutoModelForCausalLM.from_pretrained(
 )
 print("LLM loaded...")
 def get_chunks(text):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=250,
@@ -34,13 +37,16 @@ def get_chunks(text):
 def setup_database(files):
     all_chunks = []
     for file in files:
         reader = PdfReader(file)
         text = "".join(page.extract_text() for page in reader.pages)
         chunks = get_chunks(text)
         all_chunks.extend(chunks)
-    client = QdrantClient(path="./db")
     client.recreate_collection(
         collection_name="my_facts",
         vectors_config=models.VectorParams(
@@ -48,12 +54,13 @@ def setup_database(files):
             distance=models.Distance.COSINE,
         ),
     )
     records = [
         models.Record(
             id=idx,
             vector=encoder.encode(chunk).tolist(),
-            payload={f"chunk_{idx}": chunk}
         ) for idx, chunk in enumerate(all_chunks)
     ]
@@ -61,16 +68,16 @@ def setup_database(files):
         collection_name="my_facts",
         records=records,
     )
-def answer_question(question):
-    client = QdrantClient(path="./db")
     hits = client.search(
         collection_name="my_facts",
         query_vector=encoder.encode(question).tolist(),
         limit=3
     )
-    context = " ".join(hit.payload[f"chunk_{hit.id}"] for hit in hits)
     system_prompt = """You are a helpful co-worker, you will use the provided context to answer user questions.
     Read the given context before answering questions and think step by step. If you cannot answer a user question based on
@@ -82,29 +89,36 @@ def answer_question(question):
     instruction = f"Context: {context}\nUser: {question}"
     prompt_template = f"{B_INST}{B_SYS}{system_prompt}{E_SYS}{instruction}{E_INST}"
-    response = llm(prompt_template)
-    return response
 def chat(messages, files):
     if files:
         setup_database(files)
-    if messages:
-        question = messages[-1]["text"]
-        answer = answer_question(question)
-        messages.append({"text": answer, "is_user": False})
     return messages
-interface = gr.Interface(
-    fn=chat,
-    inputs=[
-        gr.Chatbot(label="Chat"),
-        gr.File(label="Upload PDFs", file_count="multiple")
-    ],
-    outputs=gr.Chatbot(label="Chat"),
-    title="Q&A with PDFs 👩🏻‍💻📓✍🏻💡",
-    description="This app facilitates a conversation with PDFs uploaded💡",
-    theme="soft",
-    live=True,
-)
-interface.launch()

 import gradio as gr
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
 )
 print("LLM loaded...")
+# Initialize QdrantClient
+client = QdrantClient(path="./db")
+print("DB created...")
 def get_chunks(text):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=250,
 def setup_database(files):
     all_chunks = []
     for file in files:
         reader = PdfReader(file)
         text = "".join(page.extract_text() for page in reader.pages)
         chunks = get_chunks(text)
         all_chunks.extend(chunks)
+    print(f"Total chunks: {len(all_chunks)}")
+    print("Chunks are ready...")
     client.recreate_collection(
         collection_name="my_facts",
         vectors_config=models.VectorParams(
             distance=models.Distance.COSINE,
         ),
     )
+    print("Collection created...")
     records = [
         models.Record(
             id=idx,
             vector=encoder.encode(chunk).tolist(),
+            payload={"text": chunk}
         ) for idx, chunk in enumerate(all_chunks)
     ]
         collection_name="my_facts",
         records=records,
     )
+    print("Records uploaded...")
+def answer(question):
     hits = client.search(
         collection_name="my_facts",
         query_vector=encoder.encode(question).tolist(),
         limit=3
     )
+    context = " ".join(hit.payload["text"] for hit in hits)
     system_prompt = """You are a helpful co-worker, you will use the provided context to answer user questions.
     Read the given context before answering questions and think step by step. If you cannot answer a user question based on
     instruction = f"Context: {context}\nUser: {question}"
     prompt_template = f"{B_INST}{B_SYS}{system_prompt}{E_SYS}{instruction}{E_INST}"
+    print(prompt_template)
+    result = llm(prompt_template)
+    return result
 def chat(messages, files):
     if files:
         setup_database(files)
+    if not messages:
+        return "Please upload PDF documents to initialize the database."
+    last_message = messages[-1]["content"]
+    response = answer(last_message)
+    messages.append({"role": "assistant", "content": response})
     return messages
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    file_input = gr.File(label="Upload PDFs", file_count="multiple")
+    with gr.Row():
+        with gr.Column(scale=0.85):
+            txt = gr.Textbox(show_label=False, placeholder="Enter your question here...").style(container=False)
+        with gr.Column(scale=0.15, min_width=0):
+            send_btn = gr.Button("Send")
+    def respond(messages, files, txt):
+        messages = chat(messages, files)
+        return messages, None, ""
+    send_btn.click(respond, [chatbot, file_input, txt], [chatbot, file_input, txt])
+    txt.submit(respond, [chatbot, file_input, txt], [chatbot, file_input, txt])
+demo.launch()