FastApi / orchestrator_agent.py
Soumik555's picture
browse url infinite loading fixed with backend check
bb534e9
raw
history blame
10.5 kB
import os
from typing import Dict, List, Any
from pydantic_ai import Agent
from pydantic_ai.models.gemini import GeminiModel
from pydantic_ai.providers.google_gla import GoogleGLAProvider
from pydantic_ai import RunContext
from pydantic import BaseModel
from google.api_core.exceptions import ResourceExhausted # Import the exception for quota exhaustion
from csv_service import get_csv_basic_info
from orchestrator_functions import csv_chart, csv_chat
from dotenv import load_dotenv
load_dotenv()
# Load all API keys from the environment variable
GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") # Expecting a comma-separated list of keys
# Function to initialize the model with a specific API key
def initialize_model(api_key: str) -> GeminiModel:
return GeminiModel(
'gemini-2.0-flash',
provider=GoogleGLAProvider(api_key=api_key)
)
# Define the tools
async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
"""
This function generates answers for the given user questions using the CSV URL.
It uses the csv_chat function to process each question and return the answers.
Args:
csv_url (str): The URL of the CSV file.
user_questions (List[str]): A list of user questions.
Returns:
List[Dict[str, Any]]: A list of dictionaries containing the question and answer for each question.
Example:
[
{"question": "What is the average age of the customers?", "answer": "The average age is 35."},
{"question": "What is the most common gender?", "answer": "The most common gender is Male."}
]
"""
print("LLM using the csv chat function....")
print("CSV URL:", csv_url)
print("User question:", user_questions)
# Create an array to accumulate the answers
answers = []
# Loop through the user questions and generate answers for each
for question in user_questions:
answer = await csv_chat(csv_url, question)
answers.append(dict(question=question, answer=answer))
return answers
async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
"""
This function generates charts for the given user questions using the CSV URL.
It uses the csv_chart function to process each question and return the chart URLs.
It returns a list of dictionaries containing the question and chart URL for each question.
Args:
csv_url (str): The URL of the CSV file.
user_questions (List[str]): A list of user questions.
Returns:
List[Dict[str, Any]]: A list of dictionaries containing the question and chart URL for each question.
Example:
[
{"question": "What is the average age of the customers?", "chart_url": "https://example.com/chart1.png"},
{"question": "What is the most common gender?", "chart_url": "https://example.com/chart2.png"}
]
"""
print("LLM using the csv chart function....")
print("CSV URL:", csv_url)
print("User question:", user_questions)
# Create an array to accumulate the charts
charts = []
# Loop through the user questions and generate charts for each
for question in user_questions:
chart = await csv_chart(csv_url, question)
charts.append(dict(question=question, image_url=chart))
return charts
# Function to create an agent with a specific CSV URL
def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
csv_metadata = get_csv_basic_info(csv_url)
system_prompt = f"""
Role: CSV Document Analysis Assistant
Purpose: Help with CSV tasks—analyzing, interpreting, and processing data.
Capabilities:
- Break down complex queries into simple steps.
- Verify CSV structure; handle missing data by asking clarifications or stating clear assumptions.
- Generate visualizations as: ![Description](direct-url).
Instructions:
1. Process multi-part queries sequentially and combine results.
2. Use available tools:
- generate_csv_answer: for CSV analysis.
- generate_chart: for creating charts.
3. Maintain a friendly, professional tone.
4. Output should be Markdown compatible.
Context:
- CSV_URL: {csv_url}
- Dataset overview: {csv_metadata}
- Conversation history: {conversation_history}
"""
return Agent(
model=initialize_model(api_key),
deps_type=str,
tools=[generate_csv_answer, generate_chart],
system_prompt=system_prompt
)
def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
print("CSV URL:", csv_url)
print("User questions:", user_question)
# Iterate through all API keys
for api_key in GEMINI_API_KEYS:
try:
print(f"Attempting with API key: {api_key}")
agent = create_agent(csv_url, api_key, conversation_history)
result = agent.run_sync(user_question)
print("Orchestrator Result:", result.data)
return result.data
except ResourceExhausted or Exception as e:
print(f"Quota exhausted for API key: {api_key}. Switching to the next key.")
continue # Move to the next key
except Exception as e:
print(f"Error with API key {api_key}: {e}")
continue # Move to the next key
# If all keys are exhausted or fail
print("All API keys have been exhausted or failed.")
return None
# import os
# from typing import Dict, List, Any
# from pydantic_ai import Agent
# from pydantic_ai.models.gemini import GeminiModel
# from pydantic_ai.providers.google_gla import GoogleGLAProvider
# from pydantic_ai import RunContext
# from pydantic import BaseModel
# from google.api_core.exceptions import ResourceExhausted
# from csv_service import get_csv_basic_info
# from orchestrator_functions import csv_chart, csv_chat
# from dotenv import load_dotenv
# load_dotenv()
# # Thread-safe key management
# current_gemini_key_index = 0
# GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
# def initialize_model(api_key: str) -> GeminiModel:
# return GeminiModel(
# 'gemini-2.0-flash',
# provider=GoogleGLAProvider(api_key=api_key)
# )
# def is_resource_exhausted_error(result_or_exception) -> bool:
# """Check if the error indicates resource exhaustion"""
# error_str = str(result_or_exception).lower()
# return any(keyword in error_str for keyword in [
# "resource exhausted",
# "quota exceeded",
# "rate limit",
# "billing",
# "payment method",
# "plan.rule"
# ])
# async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
# answers = []
# for question in user_questions:
# answer = await csv_chat(csv_url, question)
# answers.append(dict(question=question, answer=answer))
# return answers
# async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
# charts = []
# for question in user_questions:
# chart = await csv_chart(csv_url, question)
# charts.append(dict(question=question, image_url=chart))
# return charts
# def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
# csv_metadata = get_csv_basic_info(csv_url)
# system_prompt = f"""
# # Role: Expert Data Analysis Assistant
# # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.
# ## Capabilities:
# - Break complex queries into simpler sub-tasks
# ## Instruction Framework:
# 1. QUERY PROCESSING:
# - If request contains multiple questions:
# a) Decompose into logical sub-questions
# b) Process sequentially
# c) Combine results coherently
# 2. DATA HANDLING:
# - Always verify CSV structure matches the request
# - Handle missing/ambiguous data by:
# a) Asking clarifying questions OR
# b) Making reasonable assumptions (state them clearly)
# 3. VISUALIZATION STANDARDS:
# - Format images as: `![Description](direct-url)`
# - Include axis labels and titles
# - Use appropriate chart types
# 4. COMMUNICATION PROTOCOL:
# - Friendly, professional tone
# - Explain technical terms
# - Summarize key findings
# - Highlight limitations/caveats
# 5. TOOL USAGE:
# - Can process statistical operations
# - Supports visualization libraries
# ## Current Context:
# - Working with CSV_URL: {csv_url}
# - Dataset overview: {csv_metadata}
# - Your conversation history: {conversation_history}
# - Output format: Markdown compatible
# """
# return Agent(
# model=initialize_model(api_key),
# deps_type=str,
# tools=[generate_csv_answer, generate_chart],
# system_prompt=system_prompt
# )
# def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
# global current_gemini_key_index
# while current_gemini_key_index < len(GEMINI_API_KEYS):
# api_key = GEMINI_API_KEYS[current_gemini_key_index]
# try:
# print(f"Attempting with API key index {current_gemini_key_index}")
# agent = create_agent(csv_url, api_key, conversation_history)
# result = agent.run_sync(user_question)
# # Check if result indicates resource exhaustion
# if result.data and is_resource_exhausted_error(result.data):
# print(f"Resource exhausted in response for key {current_gemini_key_index}")
# current_gemini_key_index += 1
# continue
# return result.data
# except ResourceExhausted as e:
# print(f"Resource exhausted for API key {current_gemini_key_index}: {e}")
# current_gemini_key_index += 1
# continue
# except Exception as e:
# if is_resource_exhausted_error(e):
# print(f"Resource exhausted error detected for key {current_gemini_key_index}")
# current_gemini_key_index += 1
# continue
# print(f"Non-recoverable error with key {current_gemini_key_index}: {e}")
# return f"Error processing request: {str(e)}"
# return "All API keys have been exhausted. Please update billing information."