Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

FastApi / orchestrator_agent.py

Soumik555

Changed prompt only supports matplot and sns

ed7c9f7 5 months ago

raw

history blame

11.4 kB


	import os
	from typing import Dict, List, Any
	from pydantic_ai import Agent
	from pydantic_ai.models.gemini import GeminiModel
	from pydantic_ai.providers.google_gla import GoogleGLAProvider
	from pydantic_ai import RunContext
	from pydantic import BaseModel
	from google.api_core.exceptions import ResourceExhausted # Import the exception for quota exhaustion
	from csv_service import get_csv_basic_info
	from orchestrator_functions import csv_chart, csv_chat
	from dotenv import load_dotenv

	load_dotenv()


	# Load all API keys from the environment variable
	GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") # Expecting a comma-separated list of keys

	# Function to initialize the model with a specific API key
	def initialize_model(api_key: str) -> GeminiModel:
	return GeminiModel(
	'gemini-2.0-flash',
	provider=GoogleGLAProvider(api_key=api_key)
	)

	# Define the tools
	async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
	"""
	This function generates answers for the given user questions using the CSV URL.
	It uses the csv_chat function to process each question and return the answers.

	Args:
	csv_url (str): The URL of the CSV file.
	user_questions (List[str]): A list of user questions.

	Returns:
	List[Dict[str, Any]]: A list of dictionaries containing the question and answer for each question.

	Example:
	[
	{"question": "What is the average age of the customers?", "answer": "The average age is 35."},
	{"question": "What is the most common gender?", "answer": "The most common gender is Male."}
	]
	"""

	print("LLM using the csv chat function....")
	print("CSV URL:", csv_url)
	print("User question:", user_questions)

	# Create an array to accumulate the answers
	answers = []
	# Loop through the user questions and generate answers for each
	for question in user_questions:
	answer = await csv_chat(csv_url, question)
	answers.append(dict(question=question, answer=answer))
	return answers

	async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:

	"""
	This function generates charts for the given user questions using the CSV URL.
	It uses the csv_chart function to process each question and return the chart URLs.
	It returns a list of dictionaries containing the question and chart URL for each question.
	Args:
	csv_url (str): The URL of the CSV file.
	user_questions (List[str]): A list of user questions.

	Returns:
	List[Dict[str, Any]]: A list of dictionaries containing the question and chart URL for each question.

	Example:
	[
	{"question": "What is the average age of the customers?", "chart_url": "https://example.com/chart1.png"},
	{"question": "What is the most common gender?", "chart_url": "https://example.com/chart2.png"}
	]
	"""

	print("LLM using the csv chart function....")
	print("CSV URL:", csv_url)
	print("User question:", user_questions)

	# Create an array to accumulate the charts
	charts = []
	# Loop through the user questions and generate charts for each
	for question in user_questions:
	chart = await csv_chart(csv_url, question)
	charts.append(dict(question=question, image_url=chart))

	return charts

	# Function to create an agent with a specific CSV URL
	def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
	csv_metadata = get_csv_basic_info(csv_url)

	system_prompt = f"""
	# Role: Expert Data Analysis Assistant
	# Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.

	## Capabilities:
	- Break complex queries into simpler sub-tasks

	## Instruction Framework:
	1. QUERY PROCESSING:
	- If request contains multiple questions:
	a) Decompose into logical sub-questions
	b) Process sequentially
	c) Combine results coherently

	2. DATA HANDLING:
	- Always verify CSV structure matches the request
	- Handle missing/ambiguous data by:
	a) Asking clarifying questions OR
	b) Making reasonable assumptions (state them clearly)

	3. VISUALIZATION STANDARDS:
	- Format images as: `![Description](direct-url)`
	- Include axis labels and titles
	- Use appropriate chart types

	4. COMMUNICATION PROTOCOL:
	- Friendly, professional tone
	- Explain technical terms
	- Summarize key findings
	- Highlight limitations/caveats

	5. TOOL USAGE:
	- Can process statistical operations
	- Supported visualization libraries (matplotlib, seaborn)
	- Other chart libraries (e.g., plotly, bokeh etc.) not supported

	## Current Context:
	- Working with CSV_URL: {csv_url}
	- Dataset overview: {csv_metadata}
	- Your conversation history: {conversation_history}
	- Output format: Markdown compatible

	## Response Template:
	1. Confirm understanding of request
	2. Outline analysis approach
	3. Present results with visualizations (if applicable)
	4. Provide interpretation
	5. Offer next-step suggestions
	"""

	return Agent(
	model=initialize_model(api_key),
	deps_type=str,
	tools=[generate_csv_answer, generate_chart],
	system_prompt=system_prompt
	)

	def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
	print("CSV URL:", csv_url)
	print("User questions:", user_question)

	# Iterate through all API keys
	for api_key in GEMINI_API_KEYS:
	try:
	print(f"Attempting with API key: {api_key}")
	agent = create_agent(csv_url, api_key, conversation_history)
	result = agent.run_sync(user_question)
	print("Orchestrator Result:", result.data)
	return result.data
	except ResourceExhausted or Exception as e:
	print(f"Quota exhausted for API key: {api_key}. Switching to the next key.")
	continue # Move to the next key
	except Exception as e:
	print(f"Error with API key {api_key}: {e}")
	continue # Move to the next key

	# If all keys are exhausted or fail
	print("All API keys have been exhausted or failed.")
	return None











	# import os
	# from typing import Dict, List, Any
	# from pydantic_ai import Agent
	# from pydantic_ai.models.gemini import GeminiModel
	# from pydantic_ai.providers.google_gla import GoogleGLAProvider
	# from pydantic_ai import RunContext
	# from pydantic import BaseModel
	# from google.api_core.exceptions import ResourceExhausted
	# from csv_service import get_csv_basic_info
	# from orchestrator_functions import csv_chart, csv_chat
	# from dotenv import load_dotenv

	# load_dotenv()

	# # Thread-safe key management
	# current_gemini_key_index = 0
	# GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")

	# def initialize_model(api_key: str) -> GeminiModel:
	# return GeminiModel(
	# 'gemini-2.0-flash',
	# provider=GoogleGLAProvider(api_key=api_key)
	# )

	# def is_resource_exhausted_error(result_or_exception) -> bool:
	# """Check if the error indicates resource exhaustion"""
	# error_str = str(result_or_exception).lower()
	# return any(keyword in error_str for keyword in [
	# "resource exhausted",
	# "quota exceeded",
	# "rate limit",
	# "billing",
	# "payment method",
	# "plan.rule"
	# ])

	# async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
	# answers = []
	# for question in user_questions:
	# answer = await csv_chat(csv_url, question)
	# answers.append(dict(question=question, answer=answer))
	# return answers

	# async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
	# charts = []
	# for question in user_questions:
	# chart = await csv_chart(csv_url, question)
	# charts.append(dict(question=question, image_url=chart))
	# return charts

	# def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
	# csv_metadata = get_csv_basic_info(csv_url)

	# system_prompt = f"""
	# # Role: Expert Data Analysis Assistant
	# # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.

	# ## Capabilities:
	# - Break complex queries into simpler sub-tasks

	# ## Instruction Framework:
	# 1. QUERY PROCESSING:
	# - If request contains multiple questions:
	# a) Decompose into logical sub-questions
	# b) Process sequentially
	# c) Combine results coherently

	# 2. DATA HANDLING:
	# - Always verify CSV structure matches the request
	# - Handle missing/ambiguous data by:
	# a) Asking clarifying questions OR
	# b) Making reasonable assumptions (state them clearly)

	# 3. VISUALIZATION STANDARDS:
	# - Format images as: `![Description](direct-url)`
	# - Include axis labels and titles
	# - Use appropriate chart types

	# 4. COMMUNICATION PROTOCOL:
	# - Friendly, professional tone
	# - Explain technical terms
	# - Summarize key findings
	# - Highlight limitations/caveats

	# 5. TOOL USAGE:
	# - Can process statistical operations
	# - Supports visualization libraries

	# ## Current Context:
	# - Working with CSV_URL: {csv_url}
	# - Dataset overview: {csv_metadata}
	# - Your conversation history: {conversation_history}
	# - Output format: Markdown compatible
	# """

	# return Agent(
	# model=initialize_model(api_key),
	# deps_type=str,
	# tools=[generate_csv_answer, generate_chart],
	# system_prompt=system_prompt
	# )

	# def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
	# global current_gemini_key_index

	# while current_gemini_key_index < len(GEMINI_API_KEYS):
	# api_key = GEMINI_API_KEYS[current_gemini_key_index]

	# try:
	# print(f"Attempting with API key index {current_gemini_key_index}")
	# agent = create_agent(csv_url, api_key, conversation_history)
	# result = agent.run_sync(user_question)

	# # Check if result indicates resource exhaustion
	# if result.data and is_resource_exhausted_error(result.data):
	# print(f"Resource exhausted in response for key {current_gemini_key_index}")
	# current_gemini_key_index += 1
	# continue

	# return result.data

	# except ResourceExhausted as e:
	# print(f"Resource exhausted for API key {current_gemini_key_index}: {e}")
	# current_gemini_key_index += 1
	# continue

	# except Exception as e:
	# if is_resource_exhausted_error(e):
	# print(f"Resource exhausted error detected for key {current_gemini_key_index}")
	# current_gemini_key_index += 1
	# continue
	# print(f"Non-recoverable error with key {current_gemini_key_index}: {e}")
	# return f"Error processing request: {str(e)}"

	# return "All API keys have been exhausted. Please update billing information."