IProject-10 commited on
Commit
74bd715
Β·
verified Β·
1 Parent(s): ce2644e

Upload 3 files

Browse files

Initial Commits

Files changed (3) hide show
  1. app.py +119 -0
  2. chroma_store.zip +3 -0
  3. requirements +12 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import chromadb
4
+ import gradio as gr
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_together import ChatTogether
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+
12
+ # Log: Check if chroma_store exists
13
+ if not os.path.exists("chroma_store"):
14
+ print("πŸ” chroma_store folder not found. Attempting to unzip...")
15
+ try:
16
+ with zipfile.ZipFile("chroma_store.zip", "r") as zip_ref:
17
+ zip_ref.extractall("chroma_store")
18
+ print("βœ… Successfully extracted chroma_store.zip.")
19
+ except Exception as e:
20
+ print(f"❌ Failed to unzip chroma_store.zip: {e}")
21
+ else:
22
+ print("βœ… chroma_store folder already exists. Skipping unzip.")
23
+
24
+ # Initialize ChromaDB client
25
+ chroma_client = chromadb.PersistentClient(path="./chroma_store")
26
+
27
+ # Vector store and retriever
28
+ embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
29
+ vectorstore = Chroma(
30
+ client=chroma_client,
31
+ collection_name="imageonline_chunks",
32
+ embedding_function=embedding_function
33
+ )
34
+
35
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3, "filter": {"site": "imageonline"}})
36
+
37
+ # Retrieval logic
38
+ def retrieve_with_metadata(query, k=5):
39
+ docs = retriever.get_relevant_documents(query)
40
+ if not docs:
41
+ return {"context": "No relevant context found.", "references": []}
42
+ top_doc = docs[0]
43
+ return {
44
+ "context": top_doc.page_content,
45
+ "references": [{
46
+ "section": top_doc.metadata.get("section", "Unknown"),
47
+ "source": top_doc.metadata.get("source", "Unknown")
48
+ }]
49
+ }
50
+
51
+ # LLM setup
52
+ llm = ChatTogether(
53
+ model="meta-llama/Llama-3-8b-chat-hf",
54
+ temperature=0.3,
55
+ max_tokens=1024,
56
+ top_p=0.7,
57
+ together_api_key="a36246d65d8290f43667350b364c5b6bb8562eb50a4b947eec5bd7e79f2dffc6"
58
+ )
59
+
60
+ # Prompt template
61
+ prompt = ChatPromptTemplate.from_template("""
62
+ You are an expert assistant for ImageOnline Web Solutions.
63
+
64
+ Answer the user's query based ONLY on the following context:
65
+
66
+ {context}
67
+
68
+ Query: {question}
69
+ """)
70
+
71
+ rag_chain = (
72
+ {
73
+ "context": lambda x: retrieve_with_metadata(x)["context"],
74
+ "question": RunnablePassthrough()
75
+ }
76
+ | prompt
77
+ | llm
78
+ | StrOutputParser()
79
+ )
80
+
81
+ def get_references(query):
82
+ return retrieve_with_metadata(query)["references"]
83
+
84
+ # Gradio UI
85
+ def chat_interface(message, history):
86
+ history = history or []
87
+ history.append(("πŸ§‘ You: " + message, "⏳ Generating response..."))
88
+ try:
89
+ answer = rag_chain.invoke(message)
90
+ references = get_references(message)
91
+ if references:
92
+ ref = references[0]
93
+ ref_string = f"\n\nπŸ“š **Reference:**\nSection: {ref['section']}\nURL: {ref['source']}"
94
+ else:
95
+ ref_string = "\n\nπŸ“š **Reference:**\n_None available_"
96
+ full_response = answer + ref_string
97
+ history[-1] = ("πŸ§‘ You: " + message, "πŸ€– Bot: " + full_response)
98
+ except Exception as e:
99
+ history[-1] = ("πŸ§‘ You: " + message, f"πŸ€– Bot: ⚠️ {str(e)}")
100
+ return history, history
101
+
102
+ def launch_gradio():
103
+ with gr.Blocks() as demo:
104
+ gr.Markdown("# πŸ’¬ ImageOnline RAG Chatbot")
105
+ gr.Markdown("Ask about Website Designing, App Development, SEO, Hosting, etc.")
106
+ chatbot = gr.Chatbot()
107
+ state = gr.State([])
108
+ with gr.Row():
109
+ msg = gr.Textbox(placeholder="Ask your question here...", show_label=False, scale=8)
110
+ send_btn = gr.Button("πŸ“¨ Send", scale=1)
111
+ msg.submit(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
112
+ send_btn.click(chat_interface, inputs=[msg, state], outputs=[chatbot, state])
113
+ with gr.Row():
114
+ clear_btn = gr.Button("🧹 Clear Chat")
115
+ clear_btn.click(fn=lambda: ([], []), outputs=[chatbot, state])
116
+ return demo
117
+
118
+ demo = launch_gradio()
119
+ demo.launch()
chroma_store.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72520924e337f73860723324147380883db5b55f036ff54171358444fe2603ce
3
+ size 398677
requirements ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-huggingface
3
+ langchain-together
4
+ langchain-community
5
+ chromadb
6
+ sentence-transformers
7
+ trafilatura
8
+ beautifulsoup4
9
+ nltk
10
+ tiktoken
11
+ gradio
12
+ together