Spaces:

JTh34
/

puppycompanion-v3

Sleeping

puppycompanion-v3 / rag_system.py

Deploy Script

Deploy PuppyCompanion FastAPI 2025-06-02 09:57:27

b3b7a20 about 2 months ago

4.76 kB

	# rag_system.py
	import logging
	from typing import Dict, List, Optional, TypedDict

	from langchain_openai import ChatOpenAI
	from langchain_core.documents import Document
	from langchain_core.messages import HumanMessage
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.tools import tool

	from langgraph.graph import StateGraph, START, END

	# Logging configuration
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# RAG prompt for puppy-related questions
	RAG_PROMPT = """
	You are an assistant specialized in puppy education and care.
	Your role is to help new puppy owners by answering their questions with accuracy and kindness.
	Use only the information provided in the context to formulate your answers.
	If you cannot find the information in the context, just say "I don't know".

	### Question
	{question}

	### Context
	{context}
	"""

	class State(TypedDict):
	question: str
	context: List[Document]
	response: str

	class RAGSystem:
	"""RAG system for puppy-related questions"""

	def __init__(self, retriever, model_name: str = "gpt-4o-mini"):
	self.retriever = retriever
	self.llm = ChatOpenAI(model=model_name)
	self.rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
	self.graph_rag = self._build_graph()

	def _build_graph(self):
	"""Builds the RAG graph"""

	def retrieve(state):
	retrieved_docs = self.retriever.invoke(state["question"])
	return {"context": retrieved_docs}

	def generate(state):
	docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
	messages = self.rag_prompt.format_messages(
	question=state["question"],
	context=docs_content
	)
	response = self.llm.invoke(messages)
	return {"response": response.content}

	# Build the graph
	graph_builder = StateGraph(State).add_sequence([retrieve, generate])
	graph_builder.add_edge(START, "retrieve")
	return graph_builder.compile()

	def process_query(self, question: str) -> Dict:
	""" Processes a query and returns the response with context """
	result = self.graph_rag.invoke({"question": question})

	# Format detailed source information
	sources_info = []
	for i, doc in enumerate(result["context"], 1):
	metadata = doc.metadata
	# Extract useful metadata information
	source_name = metadata.get('source', 'Unknown')
	page = metadata.get('page', 'N/A')
	chapter = metadata.get('chapter', '')

	# Create a detailed source description
	if chapter:
	source_desc = f"Chunk {i} - {source_name} (Chapter: {chapter}, Page: {page})"
	else:
	source_desc = f"Chunk {i} - {source_name} (Page: {page})"

	sources_info.append({
	'chunk_number': i,
	'description': source_desc,
	'source': source_name,
	'page': page,
	'chapter': chapter,
	'content_preview': doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
	})

	return {
	"response": result["response"],
	"context": result["context"],
	"sources_info": sources_info,
	"total_chunks": len(result["context"])
	}

	def create_rag_tool(self):
	"""Creates a RAG tool for the agent"""

	# Reference to the current instance to use it in the tool
	rag_system = self

	@tool
	def ai_rag_tool(question: str) -> Dict:
	"""MANDATORY for all questions about puppies, their behavior, education or training.
	This tool accesses a specialized knowledge base on puppies with expert and reliable information.
	Any question regarding puppy care, education, behavior or health MUST be processed by this tool.
	The input must be a complete question."""

	# Invoke the RAG graph
	result = rag_system.process_query(question)

	return {
	"messages": [HumanMessage(content=result["response"])],
	"context": result["context"],
	"sources_info": result["sources_info"],
	"total_chunks": result["total_chunks"]
	}

	return ai_rag_tool