Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +193 -0
- chroma_store.zip +3 -0
- requirements +12 -0
app.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import zipfile
|
3 |
+
import chromadb
|
4 |
+
import gradio as gr
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate
|
6 |
+
from langchain_core.runnables import RunnablePassthrough
|
7 |
+
from langchain_core.output_parsers import StrOutputParser
|
8 |
+
from langchain_together import ChatTogether
|
9 |
+
from langchain_community.vectorstores import Chroma
|
10 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
11 |
+
|
12 |
+
# Log: Check if chroma_store exists
|
13 |
+
if not os.path.exists("chroma_store"):
|
14 |
+
print("🔍 chroma_store folder not found. Attempting to unzip...")
|
15 |
+
try:
|
16 |
+
with zipfile.ZipFile("chroma_store.zip", "r") as zip_ref:
|
17 |
+
zip_ref.extractall("chroma_store")
|
18 |
+
print("✅ Successfully extracted chroma_store.zip.")
|
19 |
+
except Exception as e:
|
20 |
+
print(f"❌ Failed to unzip chroma_store.zip: {e}")
|
21 |
+
else:
|
22 |
+
print("✅ chroma_store folder already exists. Skipping unzip.")
|
23 |
+
|
24 |
+
# ChromaDB setup
|
25 |
+
chroma_client = chromadb.PersistentClient(path="./chroma_store")
|
26 |
+
embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
27 |
+
vectorstore = Chroma(
|
28 |
+
client=chroma_client,
|
29 |
+
collection_name="imageonline_chunks",
|
30 |
+
embedding_function=embedding_function
|
31 |
+
)
|
32 |
+
|
33 |
+
# Retriever setup (k=5)
|
34 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 5, "filter": {"site": "imageonline"}})
|
35 |
+
|
36 |
+
# Updated retrieval logic: return full concatenated context and top 2 references
|
37 |
+
def retrieve_with_metadata(query, k=5, max_refs=2):
|
38 |
+
docs = retriever.get_relevant_documents(query)
|
39 |
+
if not docs:
|
40 |
+
return {
|
41 |
+
"context": "No relevant context found.",
|
42 |
+
"references": []
|
43 |
+
}
|
44 |
+
|
45 |
+
# Join all documents for LLM input
|
46 |
+
context = "\n\n".join(doc.page_content for doc in docs)
|
47 |
+
|
48 |
+
# Unique references (max 2)
|
49 |
+
seen = set()
|
50 |
+
references = []
|
51 |
+
for doc in docs:
|
52 |
+
source = doc.metadata.get("source", "Unknown")
|
53 |
+
section = doc.metadata.get("section", "Unknown")
|
54 |
+
key = (section, source)
|
55 |
+
if key not in seen:
|
56 |
+
seen.add(key)
|
57 |
+
references.append({"section": section, "source": source})
|
58 |
+
if len(references) >= max_refs:
|
59 |
+
break
|
60 |
+
|
61 |
+
return {
|
62 |
+
"context": context,
|
63 |
+
"references": references
|
64 |
+
}
|
65 |
+
|
66 |
+
# LLM initialization
|
67 |
+
llm = ChatTogether(
|
68 |
+
model="meta-llama/Llama-3-8b-chat-hf",
|
69 |
+
temperature=0.3,
|
70 |
+
max_tokens=1024,
|
71 |
+
top_p=0.7,
|
72 |
+
together_api_key="a36246d65d8290f43667350b364c5b6bb8562eb50a4b947eec5bd7e79f2dffc6"
|
73 |
+
)
|
74 |
+
|
75 |
+
# Improved Prompt Template
|
76 |
+
prompt = ChatPromptTemplate.from_template("""
|
77 |
+
You are a knowledgeable assistant for ImageOnline Pvt. Ltd. .
|
78 |
+
|
79 |
+
Answer the user's query using ONLY the following context extracted from our official website.
|
80 |
+
|
81 |
+
If the answer is not clearly present in the context, say "I couldn't find the information on the site."
|
82 |
+
|
83 |
+
--------------------
|
84 |
+
{context}
|
85 |
+
--------------------
|
86 |
+
|
87 |
+
Query: {question}
|
88 |
+
""")
|
89 |
+
|
90 |
+
# RAG chain
|
91 |
+
rag_chain = (
|
92 |
+
{
|
93 |
+
"context": lambda x: retrieve_with_metadata(x)["context"],
|
94 |
+
"question": RunnablePassthrough()
|
95 |
+
}
|
96 |
+
| prompt
|
97 |
+
| llm
|
98 |
+
| StrOutputParser()
|
99 |
+
)
|
100 |
+
|
101 |
+
# References for display
|
102 |
+
def get_references(query):
|
103 |
+
return retrieve_with_metadata(query)["references"]
|
104 |
+
|
105 |
+
|
106 |
+
from datetime import datetime
|
107 |
+
import time
|
108 |
+
import gradio as gr
|
109 |
+
|
110 |
+
# Chat function
|
111 |
+
def chat_interface(message, history):
|
112 |
+
history = history or []
|
113 |
+
|
114 |
+
timestamp_user = datetime.now().strftime("%H:%M:%S")
|
115 |
+
user_msg = f"🧑 **You**\n{message}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_user}</span>"
|
116 |
+
|
117 |
+
bot_msg = "⏳ _Bot is typing..._"
|
118 |
+
history.append((user_msg, bot_msg))
|
119 |
+
|
120 |
+
try:
|
121 |
+
time.sleep(0.5)
|
122 |
+
answer = rag_chain.invoke(message)
|
123 |
+
references = get_references(message)
|
124 |
+
|
125 |
+
if references:
|
126 |
+
ref_lines = "\n".join(f"{ref['section']} – {ref['source']}" for ref in references)
|
127 |
+
ref_string = f"\n\n📚 **Reference(s):**\n{ref_lines}"
|
128 |
+
else:
|
129 |
+
ref_string = "\n\n📚 **Reference(s):**\n_None available_"
|
130 |
+
|
131 |
+
full_response = answer.strip() + ref_string
|
132 |
+
timestamp_bot = datetime.now().strftime("%H:%M:%S")
|
133 |
+
bot_msg = f"🤖 **Bot**\n{full_response}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_bot}</span>"
|
134 |
+
|
135 |
+
history[-1] = (user_msg, bot_msg)
|
136 |
+
|
137 |
+
except Exception as e:
|
138 |
+
timestamp_bot = datetime.now().strftime("%H:%M:%S")
|
139 |
+
error_msg = f"🤖 **Bot**\n⚠️ {str(e)}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_bot}</span>"
|
140 |
+
history[-1] = (user_msg, error_msg)
|
141 |
+
|
142 |
+
return history, history, ""
|
143 |
+
|
144 |
+
# Gradio Launcher
|
145 |
+
def launch_gradio():
|
146 |
+
with gr.Blocks(css="""
|
147 |
+
.gr-button {
|
148 |
+
background-color: orange !important;
|
149 |
+
color: white !important;
|
150 |
+
font-weight: bold;
|
151 |
+
border-radius: 6px !important;
|
152 |
+
border: 1px solid darkorange !important;
|
153 |
+
}
|
154 |
+
|
155 |
+
.gr-button:hover {
|
156 |
+
background-color: darkorange !important;
|
157 |
+
}
|
158 |
+
|
159 |
+
.gr-textbox textarea {
|
160 |
+
border: 2px solid orange !important;
|
161 |
+
border-radius: 6px !important;
|
162 |
+
padding: 0.75rem !important;
|
163 |
+
font-size: 1rem;
|
164 |
+
}
|
165 |
+
""") as demo:
|
166 |
+
|
167 |
+
# Header and Subtitle
|
168 |
+
gr.Markdown("# 💬 ImageOnline RAG Chatbot")
|
169 |
+
gr.Markdown("Welcome! Ask about Website Designing, Web Development, App Development, About Us, Digital Marketing etc.")
|
170 |
+
|
171 |
+
chatbot = gr.Chatbot()
|
172 |
+
state = gr.State([])
|
173 |
+
|
174 |
+
with gr.Row(equal_height=True):
|
175 |
+
msg = gr.Textbox(
|
176 |
+
placeholder="Ask your question here...",
|
177 |
+
show_label=False,
|
178 |
+
scale=9
|
179 |
+
)
|
180 |
+
send_btn = gr.Button("🚀 Send", scale=1)
|
181 |
+
|
182 |
+
msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
|
183 |
+
send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
|
184 |
+
|
185 |
+
with gr.Row():
|
186 |
+
clear_btn = gr.Button("🧹 Clear Chat")
|
187 |
+
clear_btn.click(fn=lambda: ([], []), outputs=[chatbot, state])
|
188 |
+
|
189 |
+
return demo
|
190 |
+
|
191 |
+
# Launch the app
|
192 |
+
demo = launch_gradio()
|
193 |
+
demo.launch()
|
chroma_store.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e8b84ee00bcc26367fe813d3759368e2a24d5509a2cc26c4cbf20c39dfed5d
|
3 |
+
size 822068
|
requirements
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain-huggingface
|
3 |
+
langchain-together
|
4 |
+
langchain-community
|
5 |
+
chromadb
|
6 |
+
sentence-transformers
|
7 |
+
trafilatura
|
8 |
+
beautifulsoup4
|
9 |
+
nltk
|
10 |
+
tiktoken
|
11 |
+
gradio
|
12 |
+
together
|