import os from typing import Dict, List, Any from pydantic_ai import Agent from pydantic_ai.models.gemini import GeminiModel from pydantic_ai.providers.google_gla import GoogleGLAProvider from pydantic_ai import RunContext from pydantic import BaseModel from google.api_core.exceptions import ResourceExhausted # Import the exception for quota exhaustion from csv_service import get_csv_basic_info from orchestrator_functions import csv_chart, csv_chat from dotenv import load_dotenv load_dotenv() # Load all API keys from the environment variable GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") # Expecting a comma-separated list of keys # Function to initialize the model with a specific API key def initialize_model(api_key: str) -> GeminiModel: return GeminiModel( 'gemini-2.0-flash', provider=GoogleGLAProvider(api_key=api_key) ) # Define the tools async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any: """ This function generates answers for the given user questions using the CSV URL. It uses the csv_chat function to process each question and return the answers. Args: csv_url (str): The URL of the CSV file. user_questions (List[str]): A list of user questions. Returns: List[Dict[str, Any]]: A list of dictionaries containing the question and answer for each question. Example: [ {"question": "What is the average age of the customers?", "answer": "The average age is 35."}, {"question": "What is the most common gender?", "answer": "The most common gender is Male."} ] """ print("LLM using the csv chat function....") print("CSV URL:", csv_url) print("User question:", user_questions) # Create an array to accumulate the answers answers = [] # Loop through the user questions and generate answers for each for question in user_questions: answer = await csv_chat(csv_url, question) answers.append(dict(question=question, answer=answer)) return answers async def generate_chart(csv_url: str, user_questions: List[str]) -> Any: """ This function generates charts for the given user questions using the CSV URL. It uses the csv_chart function to process each question and return the chart URLs. It returns a list of dictionaries containing the question and chart URL for each question. Args: csv_url (str): The URL of the CSV file. user_questions (List[str]): A list of user questions. Returns: List[Dict[str, Any]]: A list of dictionaries containing the question and chart URL for each question. Example: [ {"question": "What is the average age of the customers?", "chart_url": "https://example.com/chart1.png"}, {"question": "What is the most common gender?", "chart_url": "https://example.com/chart2.png"} ] """ print("LLM using the csv chart function....") print("CSV URL:", csv_url) print("User question:", user_questions) # Create an array to accumulate the charts charts = [] # Loop through the user questions and generate charts for each for question in user_questions: chart = await csv_chart(csv_url, question) charts.append(dict(question=question, image_url=chart)) return charts # Function to create an agent with a specific CSV URL def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent: csv_metadata = get_csv_basic_info(csv_url) system_prompt = f""" Role: CSV Document Analysis Assistant Purpose: Help with CSV tasks—analyzing, interpreting, and processing data. Capabilities: - Break down complex queries into simple steps. - Verify CSV structure; handle missing data by asking clarifications or stating clear assumptions. - Generate visualizations as: ![Description](direct-url). Instructions: 1. Process multi-part queries sequentially and combine results. 2. Use available tools: - generate_csv_answer: for CSV analysis. - generate_chart: for creating charts. 3. Maintain a friendly, professional tone. 4. Output should be Markdown compatible. Context: - CSV_URL: {csv_url} - Dataset overview: {csv_metadata} - Conversation history: {conversation_history} """ return Agent( model=initialize_model(api_key), deps_type=str, tools=[generate_csv_answer, generate_chart], system_prompt=system_prompt ) def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str: print("CSV URL:", csv_url) print("User questions:", user_question) # Iterate through all API keys for api_key in GEMINI_API_KEYS: try: print(f"Attempting with API key: {api_key}") agent = create_agent(csv_url, api_key, conversation_history) result = agent.run_sync(user_question) print("Orchestrator Result:", result.data) return result.data except ResourceExhausted or Exception as e: print(f"Quota exhausted for API key: {api_key}. Switching to the next key.") continue # Move to the next key except Exception as e: print(f"Error with API key {api_key}: {e}") continue # Move to the next key # If all keys are exhausted or fail print("All API keys have been exhausted or failed.") return None # import os # from typing import Dict, List, Any # from pydantic_ai import Agent # from pydantic_ai.models.gemini import GeminiModel # from pydantic_ai.providers.google_gla import GoogleGLAProvider # from pydantic_ai import RunContext # from pydantic import BaseModel # from google.api_core.exceptions import ResourceExhausted # from csv_service import get_csv_basic_info # from orchestrator_functions import csv_chart, csv_chat # from dotenv import load_dotenv # load_dotenv() # # Thread-safe key management # current_gemini_key_index = 0 # GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",") # def initialize_model(api_key: str) -> GeminiModel: # return GeminiModel( # 'gemini-2.0-flash', # provider=GoogleGLAProvider(api_key=api_key) # ) # def is_resource_exhausted_error(result_or_exception) -> bool: # """Check if the error indicates resource exhaustion""" # error_str = str(result_or_exception).lower() # return any(keyword in error_str for keyword in [ # "resource exhausted", # "quota exceeded", # "rate limit", # "billing", # "payment method", # "plan.rule" # ]) # async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any: # answers = [] # for question in user_questions: # answer = await csv_chat(csv_url, question) # answers.append(dict(question=question, answer=answer)) # return answers # async def generate_chart(csv_url: str, user_questions: List[str]) -> Any: # charts = [] # for question in user_questions: # chart = await csv_chart(csv_url, question) # charts.append(dict(question=question, image_url=chart)) # return charts # def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent: # csv_metadata = get_csv_basic_info(csv_url) # system_prompt = f""" # # Role: Expert Data Analysis Assistant # # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data. # ## Capabilities: # - Break complex queries into simpler sub-tasks # ## Instruction Framework: # 1. QUERY PROCESSING: # - If request contains multiple questions: # a) Decompose into logical sub-questions # b) Process sequentially # c) Combine results coherently # 2. DATA HANDLING: # - Always verify CSV structure matches the request # - Handle missing/ambiguous data by: # a) Asking clarifying questions OR # b) Making reasonable assumptions (state them clearly) # 3. VISUALIZATION STANDARDS: # - Format images as: `![Description](direct-url)` # - Include axis labels and titles # - Use appropriate chart types # 4. COMMUNICATION PROTOCOL: # - Friendly, professional tone # - Explain technical terms # - Summarize key findings # - Highlight limitations/caveats # 5. TOOL USAGE: # - Can process statistical operations # - Supports visualization libraries # ## Current Context: # - Working with CSV_URL: {csv_url} # - Dataset overview: {csv_metadata} # - Your conversation history: {conversation_history} # - Output format: Markdown compatible # """ # return Agent( # model=initialize_model(api_key), # deps_type=str, # tools=[generate_csv_answer, generate_chart], # system_prompt=system_prompt # ) # def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str: # global current_gemini_key_index # while current_gemini_key_index < len(GEMINI_API_KEYS): # api_key = GEMINI_API_KEYS[current_gemini_key_index] # try: # print(f"Attempting with API key index {current_gemini_key_index}") # agent = create_agent(csv_url, api_key, conversation_history) # result = agent.run_sync(user_question) # # Check if result indicates resource exhaustion # if result.data and is_resource_exhausted_error(result.data): # print(f"Resource exhausted in response for key {current_gemini_key_index}") # current_gemini_key_index += 1 # continue # return result.data # except ResourceExhausted as e: # print(f"Resource exhausted for API key {current_gemini_key_index}: {e}") # current_gemini_key_index += 1 # continue # except Exception as e: # if is_resource_exhausted_error(e): # print(f"Resource exhausted error detected for key {current_gemini_key_index}") # current_gemini_key_index += 1 # continue # print(f"Non-recoverable error with key {current_gemini_key_index}: {e}") # return f"Error processing request: {str(e)}" # return "All API keys have been exhausted. Please update billing information."