puppycompanion-v3 / rag_system.py
Deploy Script
Deploy PuppyCompanion FastAPI 2025-06-02 09:57:27
b3b7a20
# rag_system.py
import logging
from typing import Dict, List, Optional, TypedDict
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage
from langchain.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langgraph.graph import StateGraph, START, END
# Logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# RAG prompt for puppy-related questions
RAG_PROMPT = """
You are an assistant specialized in puppy education and care.
Your role is to help new puppy owners by answering their questions with accuracy and kindness.
Use only the information provided in the context to formulate your answers.
If you cannot find the information in the context, just say "I don't know".
### Question
{question}
### Context
{context}
"""
class State(TypedDict):
question: str
context: List[Document]
response: str
class RAGSystem:
"""RAG system for puppy-related questions"""
def __init__(self, retriever, model_name: str = "gpt-4o-mini"):
self.retriever = retriever
self.llm = ChatOpenAI(model=model_name)
self.rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
self.graph_rag = self._build_graph()
def _build_graph(self):
"""Builds the RAG graph"""
def retrieve(state):
retrieved_docs = self.retriever.invoke(state["question"])
return {"context": retrieved_docs}
def generate(state):
docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
messages = self.rag_prompt.format_messages(
question=state["question"],
context=docs_content
)
response = self.llm.invoke(messages)
return {"response": response.content}
# Build the graph
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
return graph_builder.compile()
def process_query(self, question: str) -> Dict:
""" Processes a query and returns the response with context """
result = self.graph_rag.invoke({"question": question})
# Format detailed source information
sources_info = []
for i, doc in enumerate(result["context"], 1):
metadata = doc.metadata
# Extract useful metadata information
source_name = metadata.get('source', 'Unknown')
page = metadata.get('page', 'N/A')
chapter = metadata.get('chapter', '')
# Create a detailed source description
if chapter:
source_desc = f"Chunk {i} - {source_name} (Chapter: {chapter}, Page: {page})"
else:
source_desc = f"Chunk {i} - {source_name} (Page: {page})"
sources_info.append({
'chunk_number': i,
'description': source_desc,
'source': source_name,
'page': page,
'chapter': chapter,
'content_preview': doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
})
return {
"response": result["response"],
"context": result["context"],
"sources_info": sources_info,
"total_chunks": len(result["context"])
}
def create_rag_tool(self):
"""Creates a RAG tool for the agent"""
# Reference to the current instance to use it in the tool
rag_system = self
@tool
def ai_rag_tool(question: str) -> Dict:
"""MANDATORY for all questions about puppies, their behavior, education or training.
This tool accesses a specialized knowledge base on puppies with expert and reliable information.
Any question regarding puppy care, education, behavior or health MUST be processed by this tool.
The input must be a complete question."""
# Invoke the RAG graph
result = rag_system.process_query(question)
return {
"messages": [HumanMessage(content=result["response"])],
"context": result["context"],
"sources_info": result["sources_info"],
"total_chunks": result["total_chunks"]
}
return ai_rag_tool