import os | |
from typing import Dict, List, Any | |
from pydantic_ai import Agent | |
from pydantic_ai.models.gemini import GeminiModel | |
from pydantic_ai.providers.google_gla import GoogleGLAProvider | |
from pydantic_ai import RunContext | |
from pydantic import BaseModel | |
from google.api_core.exceptions import ResourceExhausted # Import the exception for quota exhaustion | |
from csv_service import get_csv_basic_info | |
from orchestrator_functions import csv_chart, csv_chat | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Load all API keys from the environment variable | |
GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") # Expecting a comma-separated list of keys | |
# Function to initialize the model with a specific API key | |
def initialize_model(api_key: str) -> GeminiModel: | |
return GeminiModel( | |
'gemini-2.0-flash', | |
provider=GoogleGLAProvider(api_key=api_key) | |
) | |
# Define the tools | |
async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any: | |
""" | |
This function generates answers for the given user questions using the CSV URL. | |
It uses the csv_chat function to process each question and return the answers. | |
Args: | |
csv_url (str): The URL of the CSV file. | |
user_questions (List[str]): A list of user questions. | |
Returns: | |
List[Dict[str, Any]]: A list of dictionaries containing the question and answer for each question. | |
Example: | |
[ | |
{"question": "What is the average age of the customers?", "answer": "The average age is 35."}, | |
{"question": "What is the most common gender?", "answer": "The most common gender is Male."} | |
] | |
""" | |
print("LLM using the csv chat function....") | |
print("CSV URL:", csv_url) | |
print("User question:", user_questions) | |
# Create an array to accumulate the answers | |
answers = [] | |
# Loop through the user questions and generate answers for each | |
for question in user_questions: | |
answer = await csv_chat(csv_url, question) | |
answers.append(dict(question=question, answer=answer)) | |
return answers | |
async def generate_chart(csv_url: str, user_questions: List[str]) -> Any: | |
""" | |
This function generates charts for the given user questions using the CSV URL. | |
It uses the csv_chart function to process each question and return the chart URLs. | |
It returns a list of dictionaries containing the question and chart URL for each question. | |
Args: | |
csv_url (str): The URL of the CSV file. | |
user_questions (List[str]): A list of user questions. | |
Returns: | |
List[Dict[str, Any]]: A list of dictionaries containing the question and chart URL for each question. | |
Example: | |
[ | |
{"question": "What is the average age of the customers?", "chart_url": "https://example.com/chart1.png"}, | |
{"question": "What is the most common gender?", "chart_url": "https://example.com/chart2.png"} | |
] | |
""" | |
print("LLM using the csv chart function....") | |
print("CSV URL:", csv_url) | |
print("User question:", user_questions) | |
# Create an array to accumulate the charts | |
charts = [] | |
# Loop through the user questions and generate charts for each | |
for question in user_questions: | |
chart = await csv_chart(csv_url, question) | |
charts.append(dict(question=question, image_url=chart)) | |
return charts | |
# Function to create an agent with a specific CSV URL | |
def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent: | |
csv_metadata = get_csv_basic_info(csv_url) | |
system_prompt = f""" | |
# Role: Expert Data Analysis Assistant | |
# Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data. | |
## Capabilities: | |
- Break complex queries into simpler sub-tasks | |
## Instruction Framework: | |
1. QUERY PROCESSING: | |
- If request contains multiple questions: | |
a) Decompose into logical sub-questions | |
b) Process sequentially | |
c) Combine results coherently | |
2. DATA HANDLING: | |
- Always verify CSV structure matches the request | |
- Handle missing/ambiguous data by: | |
a) Asking clarifying questions OR | |
b) Making reasonable assumptions (state them clearly) | |
3. VISUALIZATION STANDARDS: | |
- Format images as: `` | |
- Include axis labels and titles | |
- Use appropriate chart types | |
4. COMMUNICATION PROTOCOL: | |
- Friendly, professional tone | |
- Explain technical terms | |
- Summarize key findings | |
- Highlight limitations/caveats | |
5. TOOL USAGE: | |
- Can process statistical operations | |
- Supported visualization libraries (matplotlib, seaborn) | |
- Other chart libraries (e.g., plotly, bokeh etc.) not supported | |
## Current Context: | |
- Working with CSV_URL: {csv_url} | |
- Dataset overview: {csv_metadata} | |
- Your conversation history: {conversation_history} | |
- Output format: Markdown compatible | |
## Response Template: | |
1. Confirm understanding of request | |
2. Outline analysis approach | |
3. Present results with visualizations (if applicable) | |
4. Provide interpretation | |
5. Offer next-step suggestions | |
""" | |
return Agent( | |
model=initialize_model(api_key), | |
deps_type=str, | |
tools=[generate_csv_answer, generate_chart], | |
system_prompt=system_prompt | |
) | |
def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str: | |
print("CSV URL:", csv_url) | |
print("User questions:", user_question) | |
# Iterate through all API keys | |
for api_key in GEMINI_API_KEYS: | |
try: | |
print(f"Attempting with API key: {api_key}") | |
agent = create_agent(csv_url, api_key, conversation_history) | |
result = agent.run_sync(user_question) | |
print("Orchestrator Result:", result.data) | |
return result.data | |
except ResourceExhausted or Exception as e: | |
print(f"Quota exhausted for API key: {api_key}. Switching to the next key.") | |
continue # Move to the next key | |
except Exception as e: | |
print(f"Error with API key {api_key}: {e}") | |
continue # Move to the next key | |
# If all keys are exhausted or fail | |
print("All API keys have been exhausted or failed.") | |
return None | |
# import os | |
# from typing import Dict, List, Any | |
# from pydantic_ai import Agent | |
# from pydantic_ai.models.gemini import GeminiModel | |
# from pydantic_ai.providers.google_gla import GoogleGLAProvider | |
# from pydantic_ai import RunContext | |
# from pydantic import BaseModel | |
# from google.api_core.exceptions import ResourceExhausted | |
# from csv_service import get_csv_basic_info | |
# from orchestrator_functions import csv_chart, csv_chat | |
# from dotenv import load_dotenv | |
# load_dotenv() | |
# # Thread-safe key management | |
# current_gemini_key_index = 0 | |
# GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") | |
# def initialize_model(api_key: str) -> GeminiModel: | |
# return GeminiModel( | |
# 'gemini-2.0-flash', | |
# provider=GoogleGLAProvider(api_key=api_key) | |
# ) | |
# def is_resource_exhausted_error(result_or_exception) -> bool: | |
# """Check if the error indicates resource exhaustion""" | |
# error_str = str(result_or_exception).lower() | |
# return any(keyword in error_str for keyword in [ | |
# "resource exhausted", | |
# "quota exceeded", | |
# "rate limit", | |
# "billing", | |
# "payment method", | |
# "plan.rule" | |
# ]) | |
# async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any: | |
# answers = [] | |
# for question in user_questions: | |
# answer = await csv_chat(csv_url, question) | |
# answers.append(dict(question=question, answer=answer)) | |
# return answers | |
# async def generate_chart(csv_url: str, user_questions: List[str]) -> Any: | |
# charts = [] | |
# for question in user_questions: | |
# chart = await csv_chart(csv_url, question) | |
# charts.append(dict(question=question, image_url=chart)) | |
# return charts | |
# def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent: | |
# csv_metadata = get_csv_basic_info(csv_url) | |
# system_prompt = f""" | |
# # Role: Expert Data Analysis Assistant | |
# # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data. | |
# ## Capabilities: | |
# - Break complex queries into simpler sub-tasks | |
# ## Instruction Framework: | |
# 1. QUERY PROCESSING: | |
# - If request contains multiple questions: | |
# a) Decompose into logical sub-questions | |
# b) Process sequentially | |
# c) Combine results coherently | |
# 2. DATA HANDLING: | |
# - Always verify CSV structure matches the request | |
# - Handle missing/ambiguous data by: | |
# a) Asking clarifying questions OR | |
# b) Making reasonable assumptions (state them clearly) | |
# 3. VISUALIZATION STANDARDS: | |
# - Format images as: `` | |
# - Include axis labels and titles | |
# - Use appropriate chart types | |
# 4. COMMUNICATION PROTOCOL: | |
# - Friendly, professional tone | |
# - Explain technical terms | |
# - Summarize key findings | |
# - Highlight limitations/caveats | |
# 5. TOOL USAGE: | |
# - Can process statistical operations | |
# - Supports visualization libraries | |
# ## Current Context: | |
# - Working with CSV_URL: {csv_url} | |
# - Dataset overview: {csv_metadata} | |
# - Your conversation history: {conversation_history} | |
# - Output format: Markdown compatible | |
# """ | |
# return Agent( | |
# model=initialize_model(api_key), | |
# deps_type=str, | |
# tools=[generate_csv_answer, generate_chart], | |
# system_prompt=system_prompt | |
# ) | |
# def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str: | |
# global current_gemini_key_index | |
# while current_gemini_key_index < len(GEMINI_API_KEYS): | |
# api_key = GEMINI_API_KEYS[current_gemini_key_index] | |
# try: | |
# print(f"Attempting with API key index {current_gemini_key_index}") | |
# agent = create_agent(csv_url, api_key, conversation_history) | |
# result = agent.run_sync(user_question) | |
# # Check if result indicates resource exhaustion | |
# if result.data and is_resource_exhausted_error(result.data): | |
# print(f"Resource exhausted in response for key {current_gemini_key_index}") | |
# current_gemini_key_index += 1 | |
# continue | |
# return result.data | |
# except ResourceExhausted as e: | |
# print(f"Resource exhausted for API key {current_gemini_key_index}: {e}") | |
# current_gemini_key_index += 1 | |
# continue | |
# except Exception as e: | |
# if is_resource_exhausted_error(e): | |
# print(f"Resource exhausted error detected for key {current_gemini_key_index}") | |
# current_gemini_key_index += 1 | |
# continue | |
# print(f"Non-recoverable error with key {current_gemini_key_index}: {e}") | |
# return f"Error processing request: {str(e)}" | |
# return "All API keys have been exhausted. Please update billing information." |