Spaces:

IProject-10
/

IOPL-Chatbot

Sleeping

App Files Files Community

IProject-10 commited on 9 days ago

Commit

0872833

verified ·

1 Parent(s): 4866971

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -189

app.py CHANGED Viewed

@@ -1,210 +1,146 @@
 # app.py
-import os
-import uuid
-import nltk
-import trafilatura
-import chromadb
-import tiktoken
-import gradio as gr
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnableLambda, RunnablePassthrough
-from langchain_core.output_parsers import StrOutputParser
-from langchain_together import ChatTogether
-from langchain_community.vectorstores import Chroma
-from sentence_transformers import SentenceTransformer
-from nltk.tokenize import sent_tokenize
-from langchain_huggingface import HuggingFaceEmbeddings
-# Download NLTK resources
-nltk.download('punkt')
-nltk.download('punkt_tab')
-# Initialize tokenizer
-tokenizer = tiktoken.get_encoding("cl100k_base")
-# Initialize embedding model
-embedding_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
-embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-# Initialize ChromaDB
-chroma_client = chromadb.PersistentClient(path="./chroma_store")
-collection = chroma_client.get_or_create_collection(name="imageonline_chunks")
-# Sectioned URL List
-url_dict = {
-    "Website Designing": [
-        "https://www.imageonline.co.in/website-designing-mumbai.html",
-        "https://www.imageonline.co.in/domain-hosting-services-india.html",
-        "https://www.imageonline.co.in/best-seo-company-mumbai.html",
-        "https://www.imageonline.co.in/wordpress-blog-designing-india.html",
-        "https://www.imageonline.co.in/social-media-marketing-company-mumbai.html",
-        "https://www.imageonline.co.in/website-template-customization-india.html",
-        "https://www.imageonline.co.in/regular-website-maintanence-services.html",
-        "https://www.imageonline.co.in/mobile-app-designing-mumbai.html",
-        "https://www.imageonline.co.in/web-application-screen-designing.html"
-    ],
-    "Website Development": [
-        "https://www.imageonline.co.in/website-development-mumbai.html",
-        "https://www.imageonline.co.in/open-source-customization.html",
-        "https://www.imageonline.co.in/ecommerce-development-company-mumbai.html",
-        "https://www.imageonline.co.in/website-with-content-management-system.html",
-        "https://www.imageonline.co.in/web-application-development-india.html"
-    ],
-    "Mobile App Development": [
-        "https://www.imageonline.co.in/mobile-app-development-company-mumbai.html"
-    ],
-    "About Us": [
-        "https://www.imageonline.co.in/about-us.html",
-        "https://www.imageonline.co.in/vision.html",
-        "https://www.imageonline.co.in/team.html"
-    ],
-    "Testimonials": [
-        "https://www.imageonline.co.in/testimonial.html"
-    ]
-}
-# Helper functions
-def extract_clean_text(url):
-    try:
-        print(f"🔗 Fetching URL: {url}")
-        downloaded = trafilatura.fetch_url(url)
-        if downloaded:
-            content = trafilatura.extract(downloaded, include_comments=False, include_tables=False)
-            print(f"✅ Extracted text from {url}")
-            return content
-        else:
-            print(f"⚠️ Failed to fetch content from {url}")
-    except Exception as e:
-        print(f"❌ Error fetching {url}: {e}")
-    return None
-def chunk_text(text, max_tokens=400):
-    sentences = sent_tokenize(text)
-    chunks = []
-    current_chunk = []
-    for sentence in sentences:
-        current_chunk.append(sentence)
-        tokens = tokenizer.encode(" ".join(current_chunk))
-        if len(tokens) > max_tokens:
-            current_chunk.pop()
-            chunks.append(" ".join(current_chunk).strip())
-            current_chunk = [sentence]
-    if current_chunk:
-        chunks.append(" ".join(current_chunk).strip())
-    print(f"📄 Text split into {len(chunks)} chunks.")
-    return chunks
-# Check refresh override
-force_refresh = os.getenv("FORCE_REFRESH", "false").lower() == "true"
-# Load data into ChromaDB
-if collection.count() == 0 or force_refresh:
-    print("🔄 Loading documents into ChromaDB...")
-    for section, urls in url_dict.items():
-        for url in urls:
-            text = extract_clean_text(url)
-            if not text:
-                continue
-            chunks = chunk_text(text)
-            embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
-            metadatas = [{"source": url, "section": section} for _ in chunks]
-            ids = [str(uuid.uuid4()) for _ in chunks]
-            collection.add(
-                documents=chunks,
-                embeddings=embeddings.tolist(),
-                metadatas=metadatas,
-                ids=ids
-            )
-    print("✅ Document loading complete.")
-else:
-    print("✅ Using existing ChromaDB collection.")
-# Vectorstore & Retriever
-vectorstore = Chroma(
-    client=chroma_client,
-    collection_name="imageonline_chunks",
-    embedding_function=embedding_function
-)
-retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
-# Together.ai LLM
-llm = ChatTogether(
     model="meta-llama/Llama-3-8b-chat-hf",
     temperature=0.3,
     max_tokens=1024,
-    top_p=0.7,
-    together_api_key=os.getenv("TOGETHER_API_KEY")
 )
-# Prompt template (refined)
-prompt = ChatPromptTemplate.from_template("""
-You are a helpful assistant for ImageOnline Web Solutions.
-Use ONLY the information provided in the context to answer the user's query.
-Context:
-{context}
-Question:
-{question}
-If the answer is not found in the context, say "I'm sorry, I don't have enough information to answer that."
-""")
-# Context retrieval
-def retrieve_and_format(query):
-    docs = retriever.get_relevant_documents(query)
-    context_strings = []
-    for doc in docs:
-        content = doc.page_content
-        metadata = doc.metadata
-        source = metadata.get("source", "")
-        section = metadata.get("section", "")
-        context_strings.append(f"[{section}] {content}\n(Source: {source})")
-    return "\n\n".join(context_strings)
-# RAG chain
-rag_chain = (
-    {"context": RunnableLambda(retrieve_and_format), "question": RunnablePassthrough()}
-    | prompt
-    | llm
-    | StrOutputParser()
 )
-# Gradio Interface
 def chat_interface(message, history):
     history = history or []
-    history.append(("🧑 You: " + message, "⏳ Generating response..."))
     try:
-        answer = rag_chain.invoke(message)
-        history[-1] = ("🧑 You: " + message, "🤖 Bot: " + answer)
     except Exception as e:
-        error_msg = f"⚠️ Error: {str(e)}"
-        history[-1] = ("🧑 You: " + message, f"🤖 Bot: {error_msg}")
-    return history, history
 def launch_gradio():
-    with gr.Blocks() as demo:
         gr.Markdown("# 💬 ImageOnline RAG Chatbot")
-        gr.Markdown("Ask about Website Designing, App Development, SEO, Hosting, etc.")
         chatbot = gr.Chatbot()
         state = gr.State([])
-        with gr.Row():
-            msg = gr.Textbox(placeholder="Ask your question here...", show_label=False, scale=8)
-            send_btn = gr.Button("📨 Send", scale=1)
-        msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
-        send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
         with gr.Row():
             clear_btn = gr.Button("🧹 Clear Chat")
@@ -212,6 +148,6 @@ def launch_gradio():
     return demo
-if __name__ == "__main__":
-    demo = launch_gradio()
-    demo.launch()

 # app.py
+from llama_index.core import VectorStoreIndex, StorageContext, ServiceContext, Document
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.together import TogetherLLM
+from llama_index.core import Settings
+from qdrant_client import QdrantClient
+# === Qdrant Config ===
+QDRANT_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.9Pj8v4ACpX3m5U3SZUrG_jzrjGF-T41J5icZ6EPMxnc"
+QDRANT_URL = "https://d36718f0-be68-4040-b276-f1f39bc1aeb9.us-east4-0.gcp.cloud.qdrant.io"
+COLLECTION_NAME = "demo-chatbot"
+# === Embedding & LLM Setup ===
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
+llm = TogetherLLM(
     model="meta-llama/Llama-3-8b-chat-hf",
+    api_key="a36246d65d8290f43667350b364c5b6bb8562eb50a4b947eec5bd7e79f2dffc6",
     temperature=0.3,
     max_tokens=1024,
+    top_p=0.7
 )
+Settings.llm = llm
+Settings.embed_model = embed_model
+# === Qdrant Integration ===
+qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
+vector_store = QdrantVectorStore(
+    client=qdrant_client,
+    collection_name=COLLECTION_NAME
 )
+# === Build Index ===
+index = VectorStoreIndex.from_vector_store(vector_store)
+query_engine = index.as_query_engine(similarity_top_k=5)
+# === Enhanced RAG Chain with References ===
+def rag_chain(query: str, include_sources: bool = True) -> str:
+    response = query_engine.query(query)
+    response_text = str(response)
+    if include_sources:
+        references = get_clickable_references_from_response(response)
+        if references:
+            response_text += "\n\n🔗 **Sources:**\n" + "\n".join(references)
+    return response_text
+# === Clickable Reference Links (top-2 from response nodes) ===
+def get_clickable_references_from_response(response, max_refs: int = 2):
+    seen = set()
+    links = []
+    for node in response.source_nodes:
+        metadata = node.node.metadata
+        section = metadata.get("section", "Unknown")
+        source = metadata.get("source", "Unknown")
+        key = (section, source)
+        if key not in seen:
+            seen.add(key)
+            if source.startswith("http"):
+                links.append(f"- [{section}]({source})")
+            else:
+                links.append(f"- {section}: {source}")
+        if len(links) >= max_refs:
+            break
+    return links
+from datetime import datetime
+import time
+import gradio as gr
+# Chat handler
 def chat_interface(message, history):
     history = history or []
+    message = message.strip()
+    if not message:
+        raise ValueError("Please enter a valid question.")
+    timestamp_user = datetime.now().strftime("%H:%M:%S")
+    user_msg = f"🧑 **You**\n{message}\n\n⏱️ {timestamp_user}"
+    bot_msg = "⏳ _Bot is typing..._"
+    history.append((user_msg, bot_msg))
     try:
+        time.sleep(0.5)
+        answer = rag_chain(message)  # already includes references
+        full_response = answer.strip()
+        timestamp_bot = datetime.now().strftime("%H:%M:%S")
+        bot_msg = f"🤖 **Bot**\n{full_response}\n\n⏱️ {timestamp_bot}"
+        history[-1] = (user_msg, bot_msg)
     except Exception as e:
+        timestamp_bot = datetime.now().strftime("%H:%M:%S")
+        error_msg = f"🤖 **Bot**\n⚠️ {str(e)}\n\n⏱️ {timestamp_bot}"
+        history[-1] = (user_msg, error_msg)
+    return history, history, ""
+# Gradio UI
 def launch_gradio():
+    with gr.Blocks(css="""
+    .gr-button {
+        background-color: orange !important;
+        color: white !important;
+        font-weight: bold;
+        border-radius: 6px !important;
+        border: 1px solid darkorange !important;
+    }
+    .gr-button:hover {
+        background-color: darkorange !important;
+    }
+    .gr-textbox textarea {
+        border: 2px solid orange !important;
+        border-radius: 6px !important;
+        padding: 0.75rem !important;
+        font-size: 1rem;
+    }
+    """) as demo:
         gr.Markdown("# 💬 ImageOnline RAG Chatbot")
+        gr.Markdown("Welcome! Ask about Website Designing, Web Development, App Development, About Us, Digital Marketing etc.")
         chatbot = gr.Chatbot()
         state = gr.State([])
+        with gr.Row(equal_height=True):
+            msg = gr.Textbox(
+                placeholder="Ask your question here...",
+                show_label=False,
+                scale=9
+            )
+            send_btn = gr.Button("🚀 Send", scale=1)
+        msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
+        send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
         with gr.Row():
             clear_btn = gr.Button("🧹 Clear Chat")
     return demo
+# Launch
+demo = launch_gradio()
+demo.launch()