Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,7 @@ CHAT_VECTOR_MODE = "vector"
|
|
34 |
CHAT_GRAPH_MODE = "graph"
|
35 |
CHAT_FULLTEXT_MODE = "fulltext"
|
36 |
CHAT_HYBRID_MODE = "hybrid"
|
|
|
37 |
|
38 |
class Neo4jRAGSystem:
|
39 |
def __init__(self, openai_api_key: str = None):
|
@@ -133,31 +134,43 @@ class Neo4jRAGSystem:
|
|
133 |
"document_filter": True,
|
134 |
"use_vector_search": True,
|
135 |
"use_graph_search": False,
|
136 |
-
"
|
|
|
137 |
},
|
138 |
CHAT_GRAPH_MODE: {
|
139 |
"mode": "graph",
|
140 |
"document_filter": False,
|
141 |
"use_vector_search": False,
|
142 |
"use_graph_search": True,
|
143 |
-
"
|
|
|
144 |
},
|
145 |
CHAT_FULLTEXT_MODE: {
|
146 |
"mode": "fulltext",
|
147 |
"document_filter": True,
|
148 |
"use_vector_search": False,
|
149 |
"use_graph_search": False,
|
150 |
-
"
|
|
|
151 |
},
|
152 |
CHAT_HYBRID_MODE: {
|
153 |
"mode": "hybrid",
|
154 |
"document_filter": True,
|
155 |
"use_vector_search": True,
|
156 |
"use_graph_search": True,
|
|
|
157 |
"description": "Combined vector and graph search"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
}
|
159 |
}
|
160 |
-
return modes.get(mode, modes[CHAT_GRAPH_MODE])
|
161 |
|
162 |
def create_neo4j_chat_message_history(self, session_id: str, write_access: bool = True):
|
163 |
"""Create Neo4j chat message history"""
|
@@ -228,22 +241,145 @@ class Neo4jRAGSystem:
|
|
228 |
"user": "chatbot"
|
229 |
}
|
230 |
|
231 |
-
def
|
232 |
-
"""
|
233 |
try:
|
234 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
search_query = """
|
|
|
236 |
MATCH (n)
|
237 |
WHERE n.content IS NOT NULL
|
238 |
AND (toLower(n.content) CONTAINS toLower($question)
|
239 |
OR toLower(n.text) CONTAINS toLower($question)
|
240 |
OR toLower(n.title) CONTAINS toLower($question))
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
"""
|
245 |
|
246 |
-
results = self.graph.query(search_query, {"question": question})
|
247 |
|
248 |
context_parts = []
|
249 |
sources = []
|
@@ -252,18 +388,86 @@ class Neo4jRAGSystem:
|
|
252 |
content = result.get("content") or result.get("text") or ""
|
253 |
if content:
|
254 |
context_parts.append(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
sources.append({
|
256 |
-
"source": result.get("source", "
|
257 |
"title": result.get("title", "Unknown"),
|
258 |
-
"labels": result.get("labels", [])
|
|
|
|
|
259 |
})
|
260 |
|
261 |
return "\n\n".join(context_parts), sources
|
262 |
|
263 |
except Exception as e:
|
264 |
-
logger.
|
265 |
return "", []
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
def create_retriever(self, document_names: List[str], mode_settings: Dict):
|
268 |
"""Create retriever based on mode and documents"""
|
269 |
if mode_settings["use_vector_search"] and self.vector_store:
|
@@ -286,53 +490,82 @@ class Neo4jRAGSystem:
|
|
286 |
|
287 |
def process_chat_response(self, messages: List, history, question: str,
|
288 |
document_names: List[str], chat_mode_settings: Dict, session_id: str):
|
289 |
-
"""Process chat response for vector/hybrid modes"""
|
290 |
start_time = time.time()
|
291 |
|
292 |
try:
|
293 |
context = ""
|
294 |
sources = []
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
if not context:
|
315 |
context, fallback_sources = self.fallback_search(question, document_names)
|
316 |
sources.extend(fallback_sources)
|
|
|
317 |
|
318 |
-
# Create prompt template
|
319 |
-
if
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
Context:
|
324 |
{context}
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
|
|
330 |
else:
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
# Format chat history (exclude current question)
|
338 |
chat_history = messages[:-1]
|
@@ -347,7 +580,14 @@ If you cannot find the answer in the context, say so clearly. Always be accurate
|
|
347 |
"question": question
|
348 |
})
|
349 |
else:
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
"chat_history": chat_history,
|
352 |
"question": question
|
353 |
})
|
@@ -368,6 +608,7 @@ If you cannot find the answer in the context, say so clearly. Always be accurate
|
|
368 |
"total_tokens": 0,
|
369 |
"response_time": response_time,
|
370 |
"mode": chat_mode_settings["mode"],
|
|
|
371 |
"entities": [],
|
372 |
"metric_details": [],
|
373 |
},
|
@@ -386,12 +627,50 @@ If you cannot find the answer in the context, say so clearly. Always be accurate
|
|
386 |
"total_tokens": 0,
|
387 |
"response_time": 0,
|
388 |
"mode": chat_mode_settings["mode"],
|
|
|
389 |
"entities": [],
|
390 |
"metric_details": [],
|
391 |
},
|
392 |
"user": "chatbot"
|
393 |
}
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
def QA_RAG(self, question: str, document_names: str = "[]",
|
396 |
session_id: str = None, mode: str = CHAT_GRAPH_MODE, write_access: bool = True):
|
397 |
"""Main QA RAG function"""
|
@@ -521,13 +800,22 @@ def create_gradio_interface():
|
|
521 |
- Session ID: {result['session_id']}"""
|
522 |
|
523 |
if result['info']['sources']:
|
524 |
-
info_text += "\n\n**Sources:**"
|
525 |
for i, source in enumerate(result['info']['sources'], 1):
|
526 |
source_name = source.get('source', 'Unknown')
|
527 |
source_title = source.get('title', '')
|
528 |
-
|
529 |
-
|
|
|
|
|
530 |
info_text += f" - {source_title}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
status = "β
Query completed successfully"
|
533 |
|
@@ -568,10 +856,16 @@ def create_gradio_interface():
|
|
568 |
|
569 |
with gr.Row():
|
570 |
mode_dropdown = gr.Dropdown(
|
571 |
-
choices=[
|
572 |
-
|
573 |
-
|
574 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
)
|
576 |
|
577 |
session_input = gr.Textbox(
|
@@ -624,13 +918,26 @@ def create_gradio_interface():
|
|
624 |
# Add examples
|
625 |
gr.Examples(
|
626 |
examples=[
|
627 |
-
["What information do you have about machine learning?", "[]", "
|
628 |
-
["Tell me about the documents in the database", "[]", "graph"],
|
|
|
629 |
["What are the main topics covered?", "[]", "vector"],
|
|
|
630 |
],
|
631 |
inputs=[question_input, document_input, mode_dropdown],
|
632 |
)
|
633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
return demo
|
635 |
|
636 |
def main():
|
@@ -670,4 +977,18 @@ def QA_RAG_standalone(question: str, document_names: str = "[]",
|
|
670 |
}
|
671 |
|
672 |
if __name__ == "__main__":
|
673 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
CHAT_GRAPH_MODE = "graph"
|
35 |
CHAT_FULLTEXT_MODE = "fulltext"
|
36 |
CHAT_HYBRID_MODE = "hybrid"
|
37 |
+
CHAT_COMPREHENSIVE_MODE = "comprehensive" # Graph+Vector+Fulltext
|
38 |
|
39 |
class Neo4jRAGSystem:
|
40 |
def __init__(self, openai_api_key: str = None):
|
|
|
134 |
"document_filter": True,
|
135 |
"use_vector_search": True,
|
136 |
"use_graph_search": False,
|
137 |
+
"use_fulltext_search": False,
|
138 |
+
"description": "Vector similarity search using embeddings"
|
139 |
},
|
140 |
CHAT_GRAPH_MODE: {
|
141 |
"mode": "graph",
|
142 |
"document_filter": False,
|
143 |
"use_vector_search": False,
|
144 |
"use_graph_search": True,
|
145 |
+
"use_fulltext_search": False,
|
146 |
+
"description": "Graph-based knowledge retrieval using relationships"
|
147 |
},
|
148 |
CHAT_FULLTEXT_MODE: {
|
149 |
"mode": "fulltext",
|
150 |
"document_filter": True,
|
151 |
"use_vector_search": False,
|
152 |
"use_graph_search": False,
|
153 |
+
"use_fulltext_search": True,
|
154 |
+
"description": "Full-text keyword search"
|
155 |
},
|
156 |
CHAT_HYBRID_MODE: {
|
157 |
"mode": "hybrid",
|
158 |
"document_filter": True,
|
159 |
"use_vector_search": True,
|
160 |
"use_graph_search": True,
|
161 |
+
"use_fulltext_search": False,
|
162 |
"description": "Combined vector and graph search"
|
163 |
+
},
|
164 |
+
CHAT_COMPREHENSIVE_MODE: {
|
165 |
+
"mode": "comprehensive",
|
166 |
+
"document_filter": True,
|
167 |
+
"use_vector_search": True,
|
168 |
+
"use_graph_search": True,
|
169 |
+
"use_fulltext_search": True,
|
170 |
+
"description": "Graph+Vector+Fulltext - Complete search using all methods"
|
171 |
}
|
172 |
}
|
173 |
+
return modes.get(mode, modes[CHAT_GRAPH_MODE])
|
174 |
|
175 |
def create_neo4j_chat_message_history(self, session_id: str, write_access: bool = True):
|
176 |
"""Create Neo4j chat message history"""
|
|
|
241 |
"user": "chatbot"
|
242 |
}
|
243 |
|
244 |
+
def fulltext_search(self, question: str, document_names: List[str] = None, limit: int = 5):
|
245 |
+
"""Perform fulltext search on Neo4j"""
|
246 |
try:
|
247 |
+
# Create fulltext index if it doesn't exist
|
248 |
+
self.ensure_fulltext_index()
|
249 |
+
|
250 |
+
# Build fulltext search query
|
251 |
+
if document_names:
|
252 |
+
search_query = """
|
253 |
+
CALL db.index.fulltext.queryNodes('fulltext_content', $question)
|
254 |
+
YIELD node, score
|
255 |
+
WHERE any(doc IN $doc_names WHERE node.source CONTAINS doc)
|
256 |
+
RETURN node.content as content, node.text as text, node.title as title,
|
257 |
+
node.source as source, score, labels(node) as labels
|
258 |
+
ORDER BY score DESC
|
259 |
+
LIMIT $limit
|
260 |
+
"""
|
261 |
+
params = {"question": question, "doc_names": document_names, "limit": limit}
|
262 |
+
else:
|
263 |
+
search_query = """
|
264 |
+
CALL db.index.fulltext.queryNodes('fulltext_content', $question)
|
265 |
+
YIELD node, score
|
266 |
+
RETURN node.content as content, node.text as text, node.title as title,
|
267 |
+
node.source as source, score, labels(node) as labels
|
268 |
+
ORDER BY score DESC
|
269 |
+
LIMIT $limit
|
270 |
+
"""
|
271 |
+
params = {"question": question, "limit": limit}
|
272 |
+
|
273 |
+
results = self.graph.query(search_query, params)
|
274 |
+
|
275 |
+
context_parts = []
|
276 |
+
sources = []
|
277 |
+
|
278 |
+
for result in results:
|
279 |
+
content = result.get("content") or result.get("text") or ""
|
280 |
+
if content:
|
281 |
+
context_parts.append(content)
|
282 |
+
sources.append({
|
283 |
+
"source": result.get("source", "Fulltext Search"),
|
284 |
+
"title": result.get("title", "Unknown"),
|
285 |
+
"score": result.get("score", 0),
|
286 |
+
"search_type": "fulltext"
|
287 |
+
})
|
288 |
+
|
289 |
+
return "\n\n".join(context_parts), sources
|
290 |
+
|
291 |
+
except Exception as e:
|
292 |
+
logger.warning(f"Fulltext search failed: {e}")
|
293 |
+
return "", []
|
294 |
+
|
295 |
+
def ensure_fulltext_index(self):
|
296 |
+
"""Ensure fulltext index exists"""
|
297 |
+
try:
|
298 |
+
# Check if fulltext index exists
|
299 |
+
check_query = """
|
300 |
+
SHOW INDEXES YIELD name, type
|
301 |
+
WHERE name = 'fulltext_content' AND type = 'FULLTEXT'
|
302 |
+
RETURN count(*) as count
|
303 |
+
"""
|
304 |
+
result = self.graph.query(check_query)
|
305 |
+
|
306 |
+
if not result or result[0]["count"] == 0:
|
307 |
+
# Create fulltext index
|
308 |
+
create_index_query = """
|
309 |
+
CREATE FULLTEXT INDEX fulltext_content IF NOT EXISTS
|
310 |
+
FOR (n:Document|Chunk|Text)
|
311 |
+
ON EACH [n.content, n.text, n.title]
|
312 |
+
"""
|
313 |
+
self.graph.query(create_index_query)
|
314 |
+
logger.info("β
Created fulltext index")
|
315 |
+
else:
|
316 |
+
logger.info("β
Fulltext index already exists")
|
317 |
+
|
318 |
+
except Exception as e:
|
319 |
+
logger.warning(f"Could not create fulltext index: {e}")
|
320 |
+
|
321 |
+
def vector_search(self, question: str, document_names: List[str] = None, k: int = 5):
|
322 |
+
"""Perform vector similarity search"""
|
323 |
+
try:
|
324 |
+
if not self.vector_store:
|
325 |
+
return "", []
|
326 |
+
|
327 |
+
search_kwargs = {"k": k}
|
328 |
+
if document_names:
|
329 |
+
search_kwargs["filter"] = {"document_name": {"$in": document_names}}
|
330 |
+
|
331 |
+
retriever = self.vector_store.as_retriever(
|
332 |
+
search_type="similarity",
|
333 |
+
search_kwargs=search_kwargs
|
334 |
+
)
|
335 |
+
|
336 |
+
relevant_docs = retriever.invoke(question)
|
337 |
+
|
338 |
+
context = "\n\n".join([doc.page_content for doc in relevant_docs])
|
339 |
+
sources = [
|
340 |
+
{
|
341 |
+
"source": doc.metadata.get("source", "Vector Search"),
|
342 |
+
"content": doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content,
|
343 |
+
"search_type": "vector"
|
344 |
+
}
|
345 |
+
for doc in relevant_docs
|
346 |
+
]
|
347 |
+
|
348 |
+
return context, sources
|
349 |
+
|
350 |
+
except Exception as e:
|
351 |
+
logger.warning(f"Vector search failed: {e}")
|
352 |
+
return "", []
|
353 |
+
|
354 |
+
def graph_search(self, question: str, limit: int = 5):
|
355 |
+
"""Perform graph-based search using relationships"""
|
356 |
+
try:
|
357 |
+
# Search for connected information in the graph
|
358 |
search_query = """
|
359 |
+
// Find nodes matching the question
|
360 |
MATCH (n)
|
361 |
WHERE n.content IS NOT NULL
|
362 |
AND (toLower(n.content) CONTAINS toLower($question)
|
363 |
OR toLower(n.text) CONTAINS toLower($question)
|
364 |
OR toLower(n.title) CONTAINS toLower($question))
|
365 |
+
|
366 |
+
// Get connected nodes for additional context
|
367 |
+
OPTIONAL MATCH (n)-[r]-(connected)
|
368 |
+
WHERE connected.content IS NOT NULL OR connected.text IS NOT NULL
|
369 |
+
|
370 |
+
WITH n, collect(DISTINCT connected) as connected_nodes
|
371 |
+
|
372 |
+
RETURN n.content as content, n.text as text, n.title as title,
|
373 |
+
n.source as source, labels(n) as labels,
|
374 |
+
[node IN connected_nodes | {
|
375 |
+
content: coalesce(node.content, node.text),
|
376 |
+
title: node.title,
|
377 |
+
relationship: 'connected'
|
378 |
+
}][0..3] as connected_info
|
379 |
+
LIMIT $limit
|
380 |
"""
|
381 |
|
382 |
+
results = self.graph.query(search_query, {"question": question, "limit": limit})
|
383 |
|
384 |
context_parts = []
|
385 |
sources = []
|
|
|
388 |
content = result.get("content") or result.get("text") or ""
|
389 |
if content:
|
390 |
context_parts.append(content)
|
391 |
+
|
392 |
+
# Add connected context if available
|
393 |
+
connected_info = result.get("connected_info", [])
|
394 |
+
for conn in connected_info:
|
395 |
+
if conn and conn.get("content"):
|
396 |
+
context_parts.append(f"Related: {conn['content']}")
|
397 |
+
|
398 |
sources.append({
|
399 |
+
"source": result.get("source", "Graph Search"),
|
400 |
"title": result.get("title", "Unknown"),
|
401 |
+
"labels": result.get("labels", []),
|
402 |
+
"search_type": "graph",
|
403 |
+
"connected_nodes": len(connected_info)
|
404 |
})
|
405 |
|
406 |
return "\n\n".join(context_parts), sources
|
407 |
|
408 |
except Exception as e:
|
409 |
+
logger.warning(f"Graph search failed: {e}")
|
410 |
return "", []
|
411 |
|
412 |
+
def comprehensive_search(self, question: str, document_names: List[str] = None):
|
413 |
+
"""Perform comprehensive search combining vector, graph, and fulltext"""
|
414 |
+
all_context = []
|
415 |
+
all_sources = []
|
416 |
+
search_results = {"vector": 0, "graph": 0, "fulltext": 0}
|
417 |
+
|
418 |
+
# 1. Vector Search
|
419 |
+
try:
|
420 |
+
vector_context, vector_sources = self.vector_search(question, document_names, k=3)
|
421 |
+
if vector_context:
|
422 |
+
all_context.append(f"=== SEMANTIC SIMILARITY RESULTS ===\n{vector_context}")
|
423 |
+
all_sources.extend(vector_sources)
|
424 |
+
search_results["vector"] = len(vector_sources)
|
425 |
+
except Exception as e:
|
426 |
+
logger.warning(f"Vector search in comprehensive mode failed: {e}")
|
427 |
+
|
428 |
+
# 2. Graph Search
|
429 |
+
try:
|
430 |
+
graph_context, graph_sources = self.graph_search(question, limit=3)
|
431 |
+
if graph_context:
|
432 |
+
all_context.append(f"=== GRAPH RELATIONSHIP RESULTS ===\n{graph_context}")
|
433 |
+
all_sources.extend(graph_sources)
|
434 |
+
search_results["graph"] = len(graph_sources)
|
435 |
+
except Exception as e:
|
436 |
+
logger.warning(f"Graph search in comprehensive mode failed: {e}")
|
437 |
+
|
438 |
+
# 3. Fulltext Search
|
439 |
+
try:
|
440 |
+
fulltext_context, fulltext_sources = self.fulltext_search(question, document_names, limit=3)
|
441 |
+
if fulltext_context:
|
442 |
+
all_context.append(f"=== KEYWORD SEARCH RESULTS ===\n{fulltext_context}")
|
443 |
+
all_sources.extend(fulltext_sources)
|
444 |
+
search_results["fulltext"] = len(fulltext_sources)
|
445 |
+
except Exception as e:
|
446 |
+
logger.warning(f"Fulltext search in comprehensive mode failed: {e}")
|
447 |
+
|
448 |
+
# Combine and deduplicate sources
|
449 |
+
unique_sources = []
|
450 |
+
seen_sources = set()
|
451 |
+
for source in all_sources:
|
452 |
+
source_key = f"{source.get('source', '')}-{source.get('title', '')}"
|
453 |
+
if source_key not in seen_sources:
|
454 |
+
seen_sources.add(source_key)
|
455 |
+
unique_sources.append(source)
|
456 |
+
|
457 |
+
final_context = "\n\n".join(all_context)
|
458 |
+
|
459 |
+
# Add search summary to sources
|
460 |
+
search_summary = {
|
461 |
+
"source": "Search Summary",
|
462 |
+
"title": f"Combined Search Results",
|
463 |
+
"search_type": "comprehensive",
|
464 |
+
"results_breakdown": search_results,
|
465 |
+
"total_sources": len(unique_sources)
|
466 |
+
}
|
467 |
+
unique_sources.insert(0, search_summary)
|
468 |
+
|
469 |
+
return final_context, unique_sources
|
470 |
+
|
471 |
def create_retriever(self, document_names: List[str], mode_settings: Dict):
|
472 |
"""Create retriever based on mode and documents"""
|
473 |
if mode_settings["use_vector_search"] and self.vector_store:
|
|
|
490 |
|
491 |
def process_chat_response(self, messages: List, history, question: str,
|
492 |
document_names: List[str], chat_mode_settings: Dict, session_id: str):
|
493 |
+
"""Process chat response for vector/hybrid/comprehensive modes"""
|
494 |
start_time = time.time()
|
495 |
|
496 |
try:
|
497 |
context = ""
|
498 |
sources = []
|
499 |
+
search_method = "standard"
|
500 |
+
|
501 |
+
# Choose search strategy based on mode
|
502 |
+
if chat_mode_settings["mode"] == "comprehensive":
|
503 |
+
context, sources = self.comprehensive_search(question, document_names)
|
504 |
+
search_method = "comprehensive"
|
505 |
+
elif chat_mode_settings["use_vector_search"] and chat_mode_settings["use_graph_search"]:
|
506 |
+
# Hybrid mode: combine vector and graph
|
507 |
+
vector_context, vector_sources = self.vector_search(question, document_names, k=3)
|
508 |
+
graph_context, graph_sources = self.graph_search(question, limit=3)
|
509 |
+
|
510 |
+
context_parts = []
|
511 |
+
if vector_context:
|
512 |
+
context_parts.append(f"=== VECTOR SEARCH RESULTS ===\n{vector_context}")
|
513 |
+
if graph_context:
|
514 |
+
context_parts.append(f"=== GRAPH SEARCH RESULTS ===\n{graph_context}")
|
515 |
+
|
516 |
+
context = "\n\n".join(context_parts)
|
517 |
+
sources = vector_sources + graph_sources
|
518 |
+
search_method = "hybrid"
|
519 |
+
|
520 |
+
elif chat_mode_settings["use_vector_search"]:
|
521 |
+
context, sources = self.vector_search(question, document_names)
|
522 |
+
search_method = "vector"
|
523 |
+
elif chat_mode_settings["use_fulltext_search"]:
|
524 |
+
context, sources = self.fulltext_search(question, document_names)
|
525 |
+
search_method = "fulltext"
|
526 |
+
elif chat_mode_settings["use_graph_search"]:
|
527 |
+
context, sources = self.graph_search(question)
|
528 |
+
search_method = "graph"
|
529 |
+
|
530 |
+
# Fallback if no context found
|
531 |
if not context:
|
532 |
context, fallback_sources = self.fallback_search(question, document_names)
|
533 |
sources.extend(fallback_sources)
|
534 |
+
search_method += "_with_fallback"
|
535 |
|
536 |
+
# Create enhanced prompt template for comprehensive mode
|
537 |
+
if chat_mode_settings["mode"] == "comprehensive":
|
538 |
+
system_message = """You are a highly capable AI assistant with access to comprehensive search results from multiple sources:
|
539 |
+
|
540 |
+
π **SEARCH METHODS USED:**
|
541 |
+
- π **Vector Search**: Semantic similarity using embeddings
|
542 |
+
- πΈοΈ **Graph Search**: Relationship-based knowledge traversal
|
543 |
+
- π **Fulltext Search**: Keyword and phrase matching
|
544 |
+
|
545 |
+
The context below contains results from all these search methods. Use this comprehensive information to provide the most accurate and complete answer possible.
|
546 |
+
|
547 |
Context:
|
548 |
{context}
|
549 |
|
550 |
+
**Instructions:**
|
551 |
+
- Synthesize information from all search methods
|
552 |
+
- Prioritize accuracy and completeness
|
553 |
+
- Mention when information comes from relationships vs. direct content
|
554 |
+
- If conflicting information exists, note the discrepancies
|
555 |
+
- Cite sources when possible"""
|
556 |
else:
|
557 |
+
system_message = """You are a helpful AI assistant. Use the following context to answer the user's question.
|
558 |
+
|
559 |
+
Context:
|
560 |
+
{context}
|
561 |
+
|
562 |
+
If you cannot find the answer in the context, say so clearly. Always be accurate and helpful."""
|
563 |
+
|
564 |
+
prompt_template = ChatPromptTemplate.from_messages([
|
565 |
+
("system", system_message),
|
566 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
567 |
+
("human", "{question}")
|
568 |
+
])
|
569 |
|
570 |
# Format chat history (exclude current question)
|
571 |
chat_history = messages[:-1]
|
|
|
580 |
"question": question
|
581 |
})
|
582 |
else:
|
583 |
+
# No context found, use LLM knowledge
|
584 |
+
no_context_prompt = ChatPromptTemplate.from_messages([
|
585 |
+
("system", "You are a helpful AI assistant. The user is asking about information that may not be in the knowledge base. Answer based on your general knowledge while noting that you don't have specific context from their documents."),
|
586 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
587 |
+
("human", "{question}")
|
588 |
+
])
|
589 |
+
no_context_chain = no_context_prompt | self.llm | StrOutputParser()
|
590 |
+
response = no_context_chain.invoke({
|
591 |
"chat_history": chat_history,
|
592 |
"question": question
|
593 |
})
|
|
|
608 |
"total_tokens": 0,
|
609 |
"response_time": response_time,
|
610 |
"mode": chat_mode_settings["mode"],
|
611 |
+
"search_method": search_method,
|
612 |
"entities": [],
|
613 |
"metric_details": [],
|
614 |
},
|
|
|
627 |
"total_tokens": 0,
|
628 |
"response_time": 0,
|
629 |
"mode": chat_mode_settings["mode"],
|
630 |
+
"search_method": "error",
|
631 |
"entities": [],
|
632 |
"metric_details": [],
|
633 |
},
|
634 |
"user": "chatbot"
|
635 |
}
|
636 |
|
637 |
+
def fallback_search(self, question: str, document_names: List[str] = None):
|
638 |
+
"""Fallback search using direct Neo4j queries"""
|
639 |
+
try:
|
640 |
+
# Search for relevant nodes in the graph
|
641 |
+
search_query = """
|
642 |
+
MATCH (n)
|
643 |
+
WHERE n.content IS NOT NULL
|
644 |
+
AND (toLower(n.content) CONTAINS toLower($question)
|
645 |
+
OR toLower(n.text) CONTAINS toLower($question)
|
646 |
+
OR toLower(n.title) CONTAINS toLower($question))
|
647 |
+
RETURN n.content as content, n.text as text, n.title as title,
|
648 |
+
n.source as source, labels(n) as labels
|
649 |
+
LIMIT 5
|
650 |
+
"""
|
651 |
+
|
652 |
+
results = self.graph.query(search_query, {"question": question})
|
653 |
+
|
654 |
+
context_parts = []
|
655 |
+
sources = []
|
656 |
+
|
657 |
+
for result in results:
|
658 |
+
content = result.get("content") or result.get("text") or ""
|
659 |
+
if content:
|
660 |
+
context_parts.append(content)
|
661 |
+
sources.append({
|
662 |
+
"source": result.get("source", "Neo4j Graph"),
|
663 |
+
"title": result.get("title", "Unknown"),
|
664 |
+
"labels": result.get("labels", []),
|
665 |
+
"search_type": "fallback"
|
666 |
+
})
|
667 |
+
|
668 |
+
return "\n\n".join(context_parts), sources
|
669 |
+
|
670 |
+
except Exception as e:
|
671 |
+
logger.error(f"Fallback search failed: {e}")
|
672 |
+
return "", []
|
673 |
+
|
674 |
def QA_RAG(self, question: str, document_names: str = "[]",
|
675 |
session_id: str = None, mode: str = CHAT_GRAPH_MODE, write_access: bool = True):
|
676 |
"""Main QA RAG function"""
|
|
|
800 |
- Session ID: {result['session_id']}"""
|
801 |
|
802 |
if result['info']['sources']:
|
803 |
+
info_text += "\n\n**Sources & Search Methods:**"
|
804 |
for i, source in enumerate(result['info']['sources'], 1):
|
805 |
source_name = source.get('source', 'Unknown')
|
806 |
source_title = source.get('title', '')
|
807 |
+
search_type = source.get('search_type', 'unknown')
|
808 |
+
|
809 |
+
info_text += f"\n{i}. **{source_name}**"
|
810 |
+
if source_title and source_title != 'Unknown':
|
811 |
info_text += f" - {source_title}"
|
812 |
+
if search_type != 'unknown':
|
813 |
+
info_text += f" `({search_type})`"
|
814 |
+
|
815 |
+
# Add special info for comprehensive search summary
|
816 |
+
if source.get('results_breakdown'):
|
817 |
+
breakdown = source['results_breakdown']
|
818 |
+
info_text += f"\n π Results: Vector({breakdown['vector']}), Graph({breakdown['graph']}), Fulltext({breakdown['fulltext']})"
|
819 |
|
820 |
status = "β
Query completed successfully"
|
821 |
|
|
|
856 |
|
857 |
with gr.Row():
|
858 |
mode_dropdown = gr.Dropdown(
|
859 |
+
choices=[
|
860 |
+
CHAT_COMPREHENSIVE_MODE,
|
861 |
+
CHAT_GRAPH_MODE,
|
862 |
+
CHAT_VECTOR_MODE,
|
863 |
+
CHAT_HYBRID_MODE,
|
864 |
+
CHAT_FULLTEXT_MODE
|
865 |
+
],
|
866 |
+
value=CHAT_COMPREHENSIVE_MODE,
|
867 |
+
label="Search Mode",
|
868 |
+
info="Comprehensive mode uses Graph+Vector+Fulltext for best results"
|
869 |
)
|
870 |
|
871 |
session_input = gr.Textbox(
|
|
|
918 |
# Add examples
|
919 |
gr.Examples(
|
920 |
examples=[
|
921 |
+
["What information do you have about machine learning?", "[]", "comprehensive"],
|
922 |
+
["Tell me about the documents in the database", "[]", "graph"],
|
923 |
+
["Search for specific keywords in the content", "[]", "fulltext"],
|
924 |
["What are the main topics covered?", "[]", "vector"],
|
925 |
+
["Find connections between different concepts", "[]", "hybrid"],
|
926 |
],
|
927 |
inputs=[question_input, document_input, mode_dropdown],
|
928 |
)
|
929 |
+
|
930 |
+
# Add mode descriptions
|
931 |
+
gr.Markdown("""
|
932 |
+
## π **Search Mode Descriptions:**
|
933 |
+
|
934 |
+
- **π― Comprehensive**: Uses Graph+Vector+Fulltext search for the most complete results
|
935 |
+
- **πΈοΈ Graph**: Explores relationships and connections in your knowledge graph
|
936 |
+
- **π§ Vector**: Semantic similarity search using AI embeddings
|
937 |
+
- **π Hybrid**: Combines vector similarity with graph relationships
|
938 |
+
- **π Fulltext**: Traditional keyword and phrase search
|
939 |
+
""")
|
940 |
+
|
941 |
return demo
|
942 |
|
943 |
def main():
|
|
|
977 |
}
|
978 |
|
979 |
if __name__ == "__main__":
|
980 |
+
main()
|
981 |
+
|
982 |
+
# For Hugging Face Spaces deployment, you might also want to create requirements.txt:
|
983 |
+
"""
|
984 |
+
# requirements.txt content:
|
985 |
+
langchain==0.1.0
|
986 |
+
langchain-neo4j==0.0.5
|
987 |
+
langchain-openai==0.0.8
|
988 |
+
langchain-community==0.0.13
|
989 |
+
gradio==4.15.0
|
990 |
+
python-dotenv==1.0.0
|
991 |
+
neo4j==5.16.0
|
992 |
+
openai==1.10.0
|
993 |
+
tiktoken==0.5.2
|
994 |
+
"""
|