IProject-10 commited on
Commit
a855c64
·
verified ·
1 Parent(s): c5dfc96

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +193 -0
  2. chroma_store.zip +3 -0
  3. requirements +12 -0
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import chromadb
4
+ import gradio as gr
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_together import ChatTogether
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+
12
+ # Log: Check if chroma_store exists
13
+ if not os.path.exists("chroma_store"):
14
+ print("🔍 chroma_store folder not found. Attempting to unzip...")
15
+ try:
16
+ with zipfile.ZipFile("chroma_store.zip", "r") as zip_ref:
17
+ zip_ref.extractall("chroma_store")
18
+ print("✅ Successfully extracted chroma_store.zip.")
19
+ except Exception as e:
20
+ print(f"❌ Failed to unzip chroma_store.zip: {e}")
21
+ else:
22
+ print("✅ chroma_store folder already exists. Skipping unzip.")
23
+
24
+ # ChromaDB setup
25
+ chroma_client = chromadb.PersistentClient(path="./chroma_store")
26
+ embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
27
+ vectorstore = Chroma(
28
+ client=chroma_client,
29
+ collection_name="imageonline_chunks",
30
+ embedding_function=embedding_function
31
+ )
32
+
33
+ # Retriever setup (k=5)
34
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 5, "filter": {"site": "imageonline"}})
35
+
36
+ # Updated retrieval logic: return full concatenated context and top 2 references
37
+ def retrieve_with_metadata(query, k=5, max_refs=2):
38
+ docs = retriever.get_relevant_documents(query)
39
+ if not docs:
40
+ return {
41
+ "context": "No relevant context found.",
42
+ "references": []
43
+ }
44
+
45
+ # Join all documents for LLM input
46
+ context = "\n\n".join(doc.page_content for doc in docs)
47
+
48
+ # Unique references (max 2)
49
+ seen = set()
50
+ references = []
51
+ for doc in docs:
52
+ source = doc.metadata.get("source", "Unknown")
53
+ section = doc.metadata.get("section", "Unknown")
54
+ key = (section, source)
55
+ if key not in seen:
56
+ seen.add(key)
57
+ references.append({"section": section, "source": source})
58
+ if len(references) >= max_refs:
59
+ break
60
+
61
+ return {
62
+ "context": context,
63
+ "references": references
64
+ }
65
+
66
+ # LLM initialization
67
+ llm = ChatTogether(
68
+ model="meta-llama/Llama-3-8b-chat-hf",
69
+ temperature=0.3,
70
+ max_tokens=1024,
71
+ top_p=0.7,
72
+ together_api_key="a36246d65d8290f43667350b364c5b6bb8562eb50a4b947eec5bd7e79f2dffc6"
73
+ )
74
+
75
+ # Improved Prompt Template
76
+ prompt = ChatPromptTemplate.from_template("""
77
+ You are a knowledgeable assistant for ImageOnline Pvt. Ltd. .
78
+
79
+ Answer the user's query using ONLY the following context extracted from our official website.
80
+
81
+ If the answer is not clearly present in the context, say "I couldn't find the information on the site."
82
+
83
+ --------------------
84
+ {context}
85
+ --------------------
86
+
87
+ Query: {question}
88
+ """)
89
+
90
+ # RAG chain
91
+ rag_chain = (
92
+ {
93
+ "context": lambda x: retrieve_with_metadata(x)["context"],
94
+ "question": RunnablePassthrough()
95
+ }
96
+ | prompt
97
+ | llm
98
+ | StrOutputParser()
99
+ )
100
+
101
+ # References for display
102
+ def get_references(query):
103
+ return retrieve_with_metadata(query)["references"]
104
+
105
+
106
+ from datetime import datetime
107
+ import time
108
+ import gradio as gr
109
+
110
+ # Chat function
111
+ def chat_interface(message, history):
112
+ history = history or []
113
+
114
+ timestamp_user = datetime.now().strftime("%H:%M:%S")
115
+ user_msg = f"🧑 **You**\n{message}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_user}</span>"
116
+
117
+ bot_msg = "⏳ _Bot is typing..._"
118
+ history.append((user_msg, bot_msg))
119
+
120
+ try:
121
+ time.sleep(0.5)
122
+ answer = rag_chain.invoke(message)
123
+ references = get_references(message)
124
+
125
+ if references:
126
+ ref_lines = "\n".join(f"{ref['section']} – {ref['source']}" for ref in references)
127
+ ref_string = f"\n\n📚 **Reference(s):**\n{ref_lines}"
128
+ else:
129
+ ref_string = "\n\n📚 **Reference(s):**\n_None available_"
130
+
131
+ full_response = answer.strip() + ref_string
132
+ timestamp_bot = datetime.now().strftime("%H:%M:%S")
133
+ bot_msg = f"🤖 **Bot**\n{full_response}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_bot}</span>"
134
+
135
+ history[-1] = (user_msg, bot_msg)
136
+
137
+ except Exception as e:
138
+ timestamp_bot = datetime.now().strftime("%H:%M:%S")
139
+ error_msg = f"🤖 **Bot**\n⚠️ {str(e)}\n\n<span style='font-size: 0.8em; color: gray;'>⏱️ {timestamp_bot}</span>"
140
+ history[-1] = (user_msg, error_msg)
141
+
142
+ return history, history, ""
143
+
144
+ # Gradio Launcher
145
+ def launch_gradio():
146
+ with gr.Blocks(css="""
147
+ .gr-button {
148
+ background-color: orange !important;
149
+ color: white !important;
150
+ font-weight: bold;
151
+ border-radius: 6px !important;
152
+ border: 1px solid darkorange !important;
153
+ }
154
+
155
+ .gr-button:hover {
156
+ background-color: darkorange !important;
157
+ }
158
+
159
+ .gr-textbox textarea {
160
+ border: 2px solid orange !important;
161
+ border-radius: 6px !important;
162
+ padding: 0.75rem !important;
163
+ font-size: 1rem;
164
+ }
165
+ """) as demo:
166
+
167
+ # Header and Subtitle
168
+ gr.Markdown("# 💬 ImageOnline RAG Chatbot")
169
+ gr.Markdown("Welcome! Ask about Website Designing, Web Development, App Development, About Us, Digital Marketing etc.")
170
+
171
+ chatbot = gr.Chatbot()
172
+ state = gr.State([])
173
+
174
+ with gr.Row(equal_height=True):
175
+ msg = gr.Textbox(
176
+ placeholder="Ask your question here...",
177
+ show_label=False,
178
+ scale=9
179
+ )
180
+ send_btn = gr.Button("🚀 Send", scale=1)
181
+
182
+ msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
183
+ send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state, msg])
184
+
185
+ with gr.Row():
186
+ clear_btn = gr.Button("🧹 Clear Chat")
187
+ clear_btn.click(fn=lambda: ([], []), outputs=[chatbot, state])
188
+
189
+ return demo
190
+
191
+ # Launch the app
192
+ demo = launch_gradio()
193
+ demo.launch()
chroma_store.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e8b84ee00bcc26367fe813d3759368e2a24d5509a2cc26c4cbf20c39dfed5d
3
+ size 822068
requirements ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-huggingface
3
+ langchain-together
4
+ langchain-community
5
+ chromadb
6
+ sentence-transformers
7
+ trafilatura
8
+ beautifulsoup4
9
+ nltk
10
+ tiktoken
11
+ gradio
12
+ together