File size: 11,417 Bytes
4fbcf68
 
 
 
 
 
 
 
 
 
 
4f3a783
 
 
4fbcf68
 
 
 
 
 
 
 
 
 
 
 
 
 
bdba660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fbcf68
 
 
 
 
 
 
 
 
 
 
 
 
bdba660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fbcf68
 
 
 
 
 
 
 
 
 
 
 
 
 
48e6960
4fbcf68
 
d3c4ed6
 
da0456f
d3c4ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a000f1e
e453453
ed7c9f7
d3c4ed6
 
 
 
48e6960
a000f1e
d3c4ed6
 
 
 
 
 
 
 
a000f1e
 
4fbcf68
 
 
 
 
 
48e6960
4fbcf68
 
 
 
 
 
 
48e6960
4fbcf68
 
 
 
 
 
 
 
 
 
 
 
 
30e7daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

import os
from typing import Dict, List, Any
from pydantic_ai import Agent
from pydantic_ai.models.gemini import GeminiModel
from pydantic_ai.providers.google_gla import GoogleGLAProvider
from pydantic_ai import RunContext
from pydantic import BaseModel
from google.api_core.exceptions import ResourceExhausted  # Import the exception for quota exhaustion
from csv_service import get_csv_basic_info
from orchestrator_functions import csv_chart, csv_chat
from dotenv import load_dotenv

load_dotenv()


# Load all API keys from the environment variable
GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")  # Expecting a comma-separated list of keys

# Function to initialize the model with a specific API key
def initialize_model(api_key: str) -> GeminiModel:
    return GeminiModel(
        'gemini-2.0-flash',
        provider=GoogleGLAProvider(api_key=api_key)
    )

# Define the tools
async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
    """
    This function generates answers for the given user questions using the CSV URL.
    It uses the csv_chat function to process each question and return the answers.
    
    Args:
        csv_url (str): The URL of the CSV file.
        user_questions (List[str]): A list of user questions.
        
    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing the question and answer for each question.
        
    Example:
        [
            {"question": "What is the average age of the customers?", "answer": "The average age is 35."},
            {"question": "What is the most common gender?", "answer": "The most common gender is Male."}
        ]
    """
    
    print("LLM using the csv chat function....")
    print("CSV URL:", csv_url)
    print("User question:", user_questions)

    # Create an array to accumulate the answers
    answers = []
    # Loop through the user questions and generate answers for each
    for question in user_questions:
        answer = await csv_chat(csv_url, question)
        answers.append(dict(question=question, answer=answer))
    return answers

async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
    
    """
    This function generates charts for the given user questions using the CSV URL.
    It uses the csv_chart function to process each question and return the chart URLs.
    It returns a list of dictionaries containing the question and chart URL for each question.
    Args:
        csv_url (str): The URL of the CSV file.
        user_questions (List[str]): A list of user questions.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing the question and chart URL for each question.

    Example:
        [
            {"question": "What is the average age of the customers?", "chart_url": "https://example.com/chart1.png"},
            {"question": "What is the most common gender?", "chart_url": "https://example.com/chart2.png"}
        ]
    """
    
    print("LLM using the csv chart function....")
    print("CSV URL:", csv_url)
    print("User question:", user_questions)

    # Create an array to accumulate the charts
    charts = []
    # Loop through the user questions and generate charts for each
    for question in user_questions:
        chart = await csv_chart(csv_url, question)
        charts.append(dict(question=question, image_url=chart))
    
    return charts

# Function to create an agent with a specific CSV URL
def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
    csv_metadata = get_csv_basic_info(csv_url)
    
    system_prompt = f"""
# Role: Expert Data Analysis Assistant
# Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.

## Capabilities:
- Break complex queries into simpler sub-tasks

## Instruction Framework:
1. QUERY PROCESSING:
   - If request contains multiple questions:
     a) Decompose into logical sub-questions
     b) Process sequentially
     c) Combine results coherently

2. DATA HANDLING:
   - Always verify CSV structure matches the request
   - Handle missing/ambiguous data by:
     a) Asking clarifying questions OR
     b) Making reasonable assumptions (state them clearly)

3. VISUALIZATION STANDARDS:
   - Format images as: `![Description](direct-url)`
   - Include axis labels and titles
   - Use appropriate chart types

4. COMMUNICATION PROTOCOL:
   - Friendly, professional tone
   - Explain technical terms
   - Summarize key findings
   - Highlight limitations/caveats

5. TOOL USAGE:
   - Can process statistical operations
   - Supported visualization libraries (matplotlib, seaborn)
   - Other chart libraries (e.g., plotly, bokeh etc.) not supported

## Current Context:
- Working with CSV_URL: {csv_url}
- Dataset overview: {csv_metadata}
- Your conversation history: {conversation_history}
- Output format: Markdown compatible

## Response Template:
1. Confirm understanding of request
2. Outline analysis approach
3. Present results with visualizations (if applicable)
4. Provide interpretation
5. Offer next-step suggestions
"""
    
    return Agent(
        model=initialize_model(api_key),
        deps_type=str,
        tools=[generate_csv_answer, generate_chart],
        system_prompt=system_prompt
    )

def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
    print("CSV URL:", csv_url)
    print("User questions:", user_question)

    # Iterate through all API keys
    for api_key in GEMINI_API_KEYS:
        try:
            print(f"Attempting with API key: {api_key}")
            agent = create_agent(csv_url, api_key, conversation_history)
            result = agent.run_sync(user_question)
            print("Orchestrator Result:", result.data)
            return result.data
        except ResourceExhausted or Exception as e:
            print(f"Quota exhausted for API key: {api_key}. Switching to the next key.")
            continue  # Move to the next key
        except Exception as e:
            print(f"Error with API key {api_key}: {e}")
            continue  # Move to the next key

    # If all keys are exhausted or fail
    print("All API keys have been exhausted or failed.")
    return None











# import os
# from typing import Dict, List, Any
# from pydantic_ai import Agent
# from pydantic_ai.models.gemini import GeminiModel
# from pydantic_ai.providers.google_gla import GoogleGLAProvider
# from pydantic_ai import RunContext
# from pydantic import BaseModel
# from google.api_core.exceptions import ResourceExhausted
# from csv_service import get_csv_basic_info
# from orchestrator_functions import csv_chart, csv_chat
# from dotenv import load_dotenv

# load_dotenv()

# # Thread-safe key management
# current_gemini_key_index = 0
# GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")

# def initialize_model(api_key: str) -> GeminiModel:
#     return GeminiModel(
#         'gemini-2.0-flash',
#         provider=GoogleGLAProvider(api_key=api_key)
#     )

# def is_resource_exhausted_error(result_or_exception) -> bool:
#     """Check if the error indicates resource exhaustion"""
#     error_str = str(result_or_exception).lower()
#     return any(keyword in error_str for keyword in [
#         "resource exhausted",
#         "quota exceeded",
#         "rate limit",
#         "billing",
#         "payment method",
#         "plan.rule"
#     ])

# async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
#     answers = []
#     for question in user_questions:
#         answer = await csv_chat(csv_url, question)
#         answers.append(dict(question=question, answer=answer))
#     return answers

# async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
#     charts = []
#     for question in user_questions:
#         chart = await csv_chart(csv_url, question)
#         charts.append(dict(question=question, image_url=chart))
#     return charts

# def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
#     csv_metadata = get_csv_basic_info(csv_url)
    
#     system_prompt = f"""
# # Role: Expert Data Analysis Assistant
# # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.

# ## Capabilities:
# - Break complex queries into simpler sub-tasks

# ## Instruction Framework:
# 1. QUERY PROCESSING:
#    - If request contains multiple questions:
#      a) Decompose into logical sub-questions
#      b) Process sequentially
#      c) Combine results coherently

# 2. DATA HANDLING:
#    - Always verify CSV structure matches the request
#    - Handle missing/ambiguous data by:
#      a) Asking clarifying questions OR
#      b) Making reasonable assumptions (state them clearly)

# 3. VISUALIZATION STANDARDS:
#    - Format images as: `![Description](direct-url)`
#    - Include axis labels and titles
#    - Use appropriate chart types

# 4. COMMUNICATION PROTOCOL:
#    - Friendly, professional tone
#    - Explain technical terms
#    - Summarize key findings
#    - Highlight limitations/caveats

# 5. TOOL USAGE:
#    - Can process statistical operations
#    - Supports visualization libraries

# ## Current Context:
# - Working with CSV_URL: {csv_url}
# - Dataset overview: {csv_metadata}
# - Your conversation history: {conversation_history}
# - Output format: Markdown compatible
# """
    
#     return Agent(
#         model=initialize_model(api_key),
#         deps_type=str,
#         tools=[generate_csv_answer, generate_chart],
#         system_prompt=system_prompt
#     )

# def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
#     global current_gemini_key_index
    
#     while current_gemini_key_index < len(GEMINI_API_KEYS):
#         api_key = GEMINI_API_KEYS[current_gemini_key_index]
        
#         try:
#             print(f"Attempting with API key index {current_gemini_key_index}")
#             agent = create_agent(csv_url, api_key, conversation_history)
#             result = agent.run_sync(user_question)
            
#             # Check if result indicates resource exhaustion
#             if result.data and is_resource_exhausted_error(result.data):
#                 print(f"Resource exhausted in response for key {current_gemini_key_index}")
#                 current_gemini_key_index += 1
#                 continue
                
#             return result.data
            
#         except ResourceExhausted as e:
#             print(f"Resource exhausted for API key {current_gemini_key_index}: {e}")
#             current_gemini_key_index += 1
#             continue
            
#         except Exception as e:
#             if is_resource_exhausted_error(e):
#                 print(f"Resource exhausted error detected for key {current_gemini_key_index}")
#                 current_gemini_key_index += 1
#                 continue
#             print(f"Non-recoverable error with key {current_gemini_key_index}: {e}")
#             return f"Error processing request: {str(e)}"

#     return "All API keys have been exhausted. Please update billing information."