Spaces:
Running
Running
Upload 3 files
Browse filesInitial Commits
- app.py +119 -0
- chroma_store.zip +3 -0
- requirements +12 -0
app.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import zipfile
|
3 |
+
import chromadb
|
4 |
+
import gradio as gr
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate
|
6 |
+
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
|
7 |
+
from langchain_core.output_parsers import StrOutputParser
|
8 |
+
from langchain_together import ChatTogether
|
9 |
+
from langchain_community.vectorstores import Chroma
|
10 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
11 |
+
|
12 |
+
# Log: Check if chroma_store exists
|
13 |
+
if not os.path.exists("chroma_store"):
|
14 |
+
print("π chroma_store folder not found. Attempting to unzip...")
|
15 |
+
try:
|
16 |
+
with zipfile.ZipFile("chroma_store.zip", "r") as zip_ref:
|
17 |
+
zip_ref.extractall("chroma_store")
|
18 |
+
print("β
Successfully extracted chroma_store.zip.")
|
19 |
+
except Exception as e:
|
20 |
+
print(f"β Failed to unzip chroma_store.zip: {e}")
|
21 |
+
else:
|
22 |
+
print("β
chroma_store folder already exists. Skipping unzip.")
|
23 |
+
|
24 |
+
# Initialize ChromaDB client
|
25 |
+
chroma_client = chromadb.PersistentClient(path="./chroma_store")
|
26 |
+
|
27 |
+
# Vector store and retriever
|
28 |
+
embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
29 |
+
vectorstore = Chroma(
|
30 |
+
client=chroma_client,
|
31 |
+
collection_name="imageonline_chunks",
|
32 |
+
embedding_function=embedding_function
|
33 |
+
)
|
34 |
+
|
35 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 3, "filter": {"site": "imageonline"}})
|
36 |
+
|
37 |
+
# Retrieval logic
|
38 |
+
def retrieve_with_metadata(query, k=5):
|
39 |
+
docs = retriever.get_relevant_documents(query)
|
40 |
+
if not docs:
|
41 |
+
return {"context": "No relevant context found.", "references": []}
|
42 |
+
top_doc = docs[0]
|
43 |
+
return {
|
44 |
+
"context": top_doc.page_content,
|
45 |
+
"references": [{
|
46 |
+
"section": top_doc.metadata.get("section", "Unknown"),
|
47 |
+
"source": top_doc.metadata.get("source", "Unknown")
|
48 |
+
}]
|
49 |
+
}
|
50 |
+
|
51 |
+
# LLM setup
|
52 |
+
llm = ChatTogether(
|
53 |
+
model="meta-llama/Llama-3-8b-chat-hf",
|
54 |
+
temperature=0.3,
|
55 |
+
max_tokens=1024,
|
56 |
+
top_p=0.7,
|
57 |
+
together_api_key="a36246d65d8290f43667350b364c5b6bb8562eb50a4b947eec5bd7e79f2dffc6"
|
58 |
+
)
|
59 |
+
|
60 |
+
# Prompt template
|
61 |
+
prompt = ChatPromptTemplate.from_template("""
|
62 |
+
You are an expert assistant for ImageOnline Web Solutions.
|
63 |
+
|
64 |
+
Answer the user's query based ONLY on the following context:
|
65 |
+
|
66 |
+
{context}
|
67 |
+
|
68 |
+
Query: {question}
|
69 |
+
""")
|
70 |
+
|
71 |
+
rag_chain = (
|
72 |
+
{
|
73 |
+
"context": lambda x: retrieve_with_metadata(x)["context"],
|
74 |
+
"question": RunnablePassthrough()
|
75 |
+
}
|
76 |
+
| prompt
|
77 |
+
| llm
|
78 |
+
| StrOutputParser()
|
79 |
+
)
|
80 |
+
|
81 |
+
def get_references(query):
|
82 |
+
return retrieve_with_metadata(query)["references"]
|
83 |
+
|
84 |
+
# Gradio UI
|
85 |
+
def chat_interface(message, history):
|
86 |
+
history = history or []
|
87 |
+
history.append(("π§ You: " + message, "β³ Generating response..."))
|
88 |
+
try:
|
89 |
+
answer = rag_chain.invoke(message)
|
90 |
+
references = get_references(message)
|
91 |
+
if references:
|
92 |
+
ref = references[0]
|
93 |
+
ref_string = f"\n\nπ **Reference:**\nSection: {ref['section']}\nURL: {ref['source']}"
|
94 |
+
else:
|
95 |
+
ref_string = "\n\nπ **Reference:**\n_None available_"
|
96 |
+
full_response = answer + ref_string
|
97 |
+
history[-1] = ("π§ You: " + message, "π€ Bot: " + full_response)
|
98 |
+
except Exception as e:
|
99 |
+
history[-1] = ("π§ You: " + message, f"π€ Bot: β οΈ {str(e)}")
|
100 |
+
return history, history
|
101 |
+
|
102 |
+
def launch_gradio():
|
103 |
+
with gr.Blocks() as demo:
|
104 |
+
gr.Markdown("# π¬ ImageOnline RAG Chatbot")
|
105 |
+
gr.Markdown("Ask about Website Designing, App Development, SEO, Hosting, etc.")
|
106 |
+
chatbot = gr.Chatbot()
|
107 |
+
state = gr.State([])
|
108 |
+
with gr.Row():
|
109 |
+
msg = gr.Textbox(placeholder="Ask your question here...", show_label=False, scale=8)
|
110 |
+
send_btn = gr.Button("π¨ Send", scale=1)
|
111 |
+
msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
|
112 |
+
send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
|
113 |
+
with gr.Row():
|
114 |
+
clear_btn = gr.Button("π§Ή Clear Chat")
|
115 |
+
clear_btn.click(fn=lambda: ([], []), outputs=[chatbot, state])
|
116 |
+
return demo
|
117 |
+
|
118 |
+
demo = launch_gradio()
|
119 |
+
demo.launch()
|
chroma_store.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72520924e337f73860723324147380883db5b55f036ff54171358444fe2603ce
|
3 |
+
size 398677
|
requirements
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain-huggingface
|
3 |
+
langchain-together
|
4 |
+
langchain-community
|
5 |
+
chromadb
|
6 |
+
sentence-transformers
|
7 |
+
trafilatura
|
8 |
+
beautifulsoup4
|
9 |
+
nltk
|
10 |
+
tiktoken
|
11 |
+
gradio
|
12 |
+
together
|