Soumik555 commited on
Commit
30e7daa
·
1 Parent(s): ee5c938

openai key rotate

Browse files
gemini_langchain_agent.py CHANGED
@@ -18,12 +18,17 @@ matplotlib.use('Agg')
18
 
19
  load_dotenv()
20
  model_name = 'gemini-2.0-flash' # Specify the model name
21
- google_api_keys = list(reversed(os.getenv("GEMINI_API_KEYS").split(",")))
22
- current_key_index = 0 # Global index for API keys
 
 
 
 
 
 
23
 
24
  def create_agent(llm, data, tools):
25
  """Create agent with tool names"""
26
-
27
  return create_pandas_dataframe_agent(
28
  llm,
29
  data,
@@ -34,69 +39,62 @@ def create_agent(llm, data, tools):
34
  return_intermediate_steps=True
35
  )
36
 
37
-
38
  def _prompt_generator(question: str, chart_required: bool):
39
-
40
  chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
41
 
42
- 1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
43
- 2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
44
- 3. **Communication:** Provide concise, professional, and well-structured responses.
45
- 4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
46
 
47
- **Query:** {question}
48
-
49
- """
50
 
51
  chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
52
 
53
- 1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
54
- 2. Visualization requirements:
55
- - Adjust font sizes, rotate labels (45° if needed), truncate for readability
56
- - Figure size: (12, 6)
57
- - Descriptive titles (fontsize=14)
58
- - Colorblind-friendly palettes
59
- 3. File handling rules:
60
- - Create MAXIMUM 2 charts if absolutely necessary
61
- - For multiple charts:
62
- * Arrange in grid format (2x1 vertical layout preferred)
63
- * Use SAME unique_id with suffixes:
64
- - f"{{unique_id}}_1.png"
65
- - f"{{unique_id}}_2.png"
66
- - Save EXCLUSIVELY to "generated_charts" folder
67
- - File naming: f"chart_{{unique_id}}.png" (for single chart)
68
- 4. FINAL OUTPUT MUST BE:
69
- - For single chart: f"generated_charts/chart_{{unique_id}}.png"
70
- - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
71
- - **ONLY return this full path string, nothing else**
72
-
73
- **Query:** {question}
74
-
75
- IMPORTANT:
76
- - Generate the unique_id FIRST before any operations
77
- - Use THE SAME unique_id throughout entire process
78
- - NEVER generate new UUIDs after initial creation
79
- - Return EXACT filepath string of the final saved chart
80
- """
81
 
82
-
83
  if chart_required:
84
  return ChatPromptTemplate.from_template(chart_prompt)
85
  else:
86
  return ChatPromptTemplate.from_template(chat_prompt)
87
 
88
  def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
89
- global current_key_index
90
  data = pd.read_csv(csv_url)
91
 
92
- attempts = 0
93
- total_keys = len(google_api_keys)
94
- while attempts < total_keys:
95
  try:
96
- api_key = google_api_keys[current_key_index]
97
- print(f"Using API key index {current_key_index}")
98
-
99
- llm = ChatGoogleGenerativeAI(model=model_name, api_key=api_key)
100
 
101
  # Create tool with validated name
102
  tool = PythonAstREPLTool(
@@ -113,15 +111,222 @@ def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bo
113
  )
114
 
115
  agent = create_agent(llm, data, [tool])
116
-
117
  prompt = _prompt_generator(question, chart_required)
118
  result = agent.invoke({"input": prompt})
119
- return result.get("output")
 
 
 
 
 
120
 
121
  except Exception as e:
122
- print(f"Error using API key index {current_key_index}: {e}")
123
- current_key_index = (current_key_index + 1) % total_keys
124
- attempts += 1
125
 
126
- print("All API keys have been exhausted.")
127
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  load_dotenv()
20
  model_name = 'gemini-2.0-flash' # Specify the model name
21
+ google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")
22
+
23
+ # Create pre-initialized LLM instances
24
+ llm_instances = [
25
+ ChatGoogleGenerativeAI(model=model_name, api_key=key)
26
+ for key in google_api_keys
27
+ ]
28
+ current_instance_index = 0 # Track current instance being used
29
 
30
  def create_agent(llm, data, tools):
31
  """Create agent with tool names"""
 
32
  return create_pandas_dataframe_agent(
33
  llm,
34
  data,
 
39
  return_intermediate_steps=True
40
  )
41
 
 
42
  def _prompt_generator(question: str, chart_required: bool):
 
43
  chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
44
 
45
+ 1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
46
+ 2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
47
+ 3. **Communication:** Provide concise, professional, and well-structured responses.
48
+ 4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
49
 
50
+ **Query:** {question}
51
+ """
 
52
 
53
  chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
54
 
55
+ 1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
56
+ 2. Visualization requirements:
57
+ - Adjust font sizes, rotate labels (45° if needed), truncate for readability
58
+ - Figure size: (12, 6)
59
+ - Descriptive titles (fontsize=14)
60
+ - Colorblind-friendly palettes
61
+ 3. File handling rules:
62
+ - Create MAXIMUM 2 charts if absolutely necessary
63
+ - For multiple charts:
64
+ * Arrange in grid format (2x1 vertical layout preferred)
65
+ * Use SAME unique_id with suffixes:
66
+ - f"{{unique_id}}_1.png"
67
+ - f"{{unique_id}}_2.png"
68
+ - Save EXCLUSIVELY to "generated_charts" folder
69
+ - File naming: f"chart_{{unique_id}}.png" (for single chart)
70
+ 4. FINAL OUTPUT MUST BE:
71
+ - For single chart: f"generated_charts/chart_{{unique_id}}.png"
72
+ - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
73
+ - **ONLY return this full path string, nothing else**
74
+
75
+ **Query:** {question}
76
+
77
+ IMPORTANT:
78
+ - Generate the unique_id FIRST before any operations
79
+ - Use THE SAME unique_id throughout entire process
80
+ - NEVER generate new UUIDs after initial creation
81
+ - Return EXACT filepath string of the final saved chart
82
+ """
83
 
 
84
  if chart_required:
85
  return ChatPromptTemplate.from_template(chart_prompt)
86
  else:
87
  return ChatPromptTemplate.from_template(chat_prompt)
88
 
89
  def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
90
+ global current_instance_index
91
  data = pd.read_csv(csv_url)
92
 
93
+ # Try all available instances
94
+ while current_instance_index < len(llm_instances):
 
95
  try:
96
+ llm = llm_instances[current_instance_index]
97
+ print(f"Using LLM instance index {current_instance_index}")
 
 
98
 
99
  # Create tool with validated name
100
  tool = PythonAstREPLTool(
 
111
  )
112
 
113
  agent = create_agent(llm, data, [tool])
 
114
  prompt = _prompt_generator(question, chart_required)
115
  result = agent.invoke({"input": prompt})
116
+ output = result.get("output")
117
+
118
+ if output is None:
119
+ raise ValueError("Received None response from agent")
120
+
121
+ return output
122
 
123
  except Exception as e:
124
+ print(f"Error using LLM instance index {current_instance_index}: {e}")
125
+ current_instance_index += 1
 
126
 
127
+ print("All LLM instances have been exhausted.")
128
  return None
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+ # import os
144
+ # import re
145
+ # import uuid
146
+ # from langchain_google_genai import ChatGoogleGenerativeAI
147
+ # import pandas as pd
148
+ # from langchain_core.prompts import ChatPromptTemplate
149
+ # from langchain_experimental.tools import PythonAstREPLTool
150
+ # from langchain_experimental.agents import create_pandas_dataframe_agent
151
+ # from dotenv import load_dotenv
152
+ # import numpy as np
153
+ # import matplotlib.pyplot as plt
154
+ # import matplotlib
155
+ # import seaborn as sns
156
+ # import datetime as dt
157
+
158
+ # # Set the backend for matplotlib to 'Agg' to avoid GUI issues
159
+ # matplotlib.use('Agg')
160
+
161
+ # load_dotenv()
162
+ # model_name = 'gemini-2.0-flash' # Specify the model name
163
+ # google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")
164
+
165
+ # # Create pre-initialized LLM instances
166
+ # llm_instances = [
167
+ # ChatGoogleGenerativeAI(model=model_name, api_key=key)
168
+ # for key in google_api_keys
169
+ # ]
170
+ # current_instance_index = 0 # Track current instance being used
171
+
172
+ # def is_retryable_error(error: Exception) -> bool:
173
+ # """Check if the error should trigger a retry with next instance"""
174
+ # error_str = str(error).lower()
175
+
176
+ # retry_conditions = [
177
+ # # Rate limiting and quota errors
178
+ # '429' in error_str,
179
+ # 'quota' in error_str,
180
+ # 'rate limit' in error_str,
181
+ # 'resource exhausted' in error_str,
182
+ # 'exceeded' in error_str,
183
+ # 'limit reached' in error_str,
184
+
185
+ # # Authentication and permission errors
186
+ # 'permission denied' in error_str,
187
+ # 'invalid api key' in error_str,
188
+ # 'authentication' in error_str,
189
+
190
+ # # Server errors
191
+ # '500' in error_str,
192
+ # '503' in error_str,
193
+ # 'service unavailable' in error_str,
194
+
195
+ # # Connection issues
196
+ # 'timeout' in error_str,
197
+ # 'connection' in error_str,
198
+
199
+ # # Content policy
200
+ # 'content policy' in error_str,
201
+ # 'safety' in error_str,
202
+ # 'blocked' in error_str
203
+ # ]
204
+
205
+ # return any(retry_conditions)
206
+
207
+ # def create_agent(llm, data, tools):
208
+ # """Create agent with tool names"""
209
+ # return create_pandas_dataframe_agent(
210
+ # llm,
211
+ # data,
212
+ # agent_type="tool-calling",
213
+ # verbose=True,
214
+ # allow_dangerous_code=True,
215
+ # extra_tools=tools,
216
+ # return_intermediate_steps=True
217
+ # )
218
+
219
+ # def _prompt_generator(question: str, chart_required: bool):
220
+ # chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
221
+
222
+ # 1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
223
+ # 2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
224
+ # 3. **Communication:** Provide concise, professional, and well-structured responses.
225
+ # 4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
226
+
227
+ # **Query:** {question}
228
+ # """
229
+
230
+ # chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
231
+
232
+ # 1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
233
+ # 2. Visualization requirements:
234
+ # - Adjust font sizes, rotate labels (45° if needed), truncate for readability
235
+ # - Figure size: (12, 6)
236
+ # - Descriptive titles (fontsize=14)
237
+ # - Colorblind-friendly palettes
238
+ # 3. File handling rules:
239
+ # - Create MAXIMUM 2 charts if absolutely necessary
240
+ # - For multiple charts:
241
+ # * Arrange in grid format (2x1 vertical layout preferred)
242
+ # * Use SAME unique_id with suffixes:
243
+ # - f"{{unique_id}}_1.png"
244
+ # - f"{{unique_id}}_2.png"
245
+ # - Save EXCLUSIVELY to "generated_charts" folder
246
+ # - File naming: f"chart_{{unique_id}}.png" (for single chart)
247
+ # 4. FINAL OUTPUT MUST BE:
248
+ # - For single chart: f"generated_charts/chart_{{unique_id}}.png"
249
+ # - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
250
+ # - **ONLY return this full path string, nothing else**
251
+
252
+ # **Query:** {question}
253
+
254
+ # IMPORTANT:
255
+ # - Generate the unique_id FIRST before any operations
256
+ # - Use THE SAME unique_id throughout entire process
257
+ # - NEVER generate new UUIDs after initial creation
258
+ # - Return EXACT filepath string of the final saved chart
259
+ # """
260
+
261
+ # if chart_required:
262
+ # return ChatPromptTemplate.from_template(chart_prompt)
263
+ # else:
264
+ # return ChatPromptTemplate.from_template(chat_prompt)
265
+
266
+ # def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
267
+ # global current_instance_index
268
+ # data = pd.read_csv(csv_url)
269
+
270
+ # # Track first error in case all instances fail
271
+ # first_error = None
272
+
273
+ # while current_instance_index < len(llm_instances):
274
+ # try:
275
+ # llm = llm_instances[current_instance_index]
276
+ # print(f"Attempting with LLM instance {current_instance_index + 1}/{len(llm_instances)}")
277
+
278
+ # # Create tool with validated name
279
+ # tool = PythonAstREPLTool(
280
+ # locals={
281
+ # "df": data,
282
+ # "pd": pd,
283
+ # "np": np,
284
+ # "plt": plt,
285
+ # "sns": sns,
286
+ # "matplotlib": matplotlib,
287
+ # "uuid": uuid,
288
+ # "dt": dt
289
+ # },
290
+ # )
291
+
292
+ # agent = create_agent(llm, data, [tool])
293
+ # prompt = _prompt_generator(question, chart_required)
294
+ # result = agent.invoke({"input": prompt})
295
+ # output = result.get("output")
296
+
297
+ # if output is None:
298
+ # raise ValueError("Received None response from agent")
299
+
300
+ # if isinstance(output, str) and any(err in output.lower() for err in ['quota', 'limit', 'exhausted']):
301
+ # raise ValueError(f"API limitation detected in response: {output}")
302
+
303
+ # return output
304
+
305
+ # except Exception as e:
306
+ # error_msg = f"Error with instance {current_instance_index}: {str(e)}"
307
+ # print(error_msg)
308
+
309
+ # # Store first error if not set
310
+ # if first_error is None:
311
+ # first_error = error_msg
312
+
313
+ # # Check if we should try next instance
314
+ # if is_retryable_error(e):
315
+ # current_instance_index += 1
316
+ # continue
317
+ # else:
318
+ # # Non-retryable error - return immediately
319
+ # return {
320
+ # "error": "Non-retryable error occurred",
321
+ # "details": str(e),
322
+ # "instance": current_instance_index
323
+ # }
324
+
325
+ # # All instances exhausted
326
+ # error_response = {
327
+ # "error": "All API instances failed",
328
+ # "details": first_error or "Unknown error",
329
+ # "attempted_instances": current_instance_index
330
+ # }
331
+ # print(error_response)
332
+ # return error_response
gemini_report_generator.py CHANGED
@@ -364,3 +364,413 @@ async def generate_csv_report(csv_url: str, query: str) -> FileBoxProps:
364
  # result = gemini_llm_chat("./documents/enterprise_sales_data.csv",
365
  # "Generate a detailed sales report of the last 6 months from all the aspects and include a bar chart showing the sales by region.")
366
  # print(json.dumps(result, indent=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  # result = gemini_llm_chat("./documents/enterprise_sales_data.csv",
365
  # "Generate a detailed sales report of the last 6 months from all the aspects and include a bar chart showing the sales by region.")
366
  # print(json.dumps(result, indent=2))
367
+
368
+
369
+
370
+
371
+ # import json
372
+ # import numpy as np
373
+ # import pandas as pd
374
+ # import re
375
+ # import os
376
+ # import uuid
377
+ # import logging
378
+ # from io import StringIO
379
+ # import sys
380
+ # import traceback
381
+ # from typing import Optional, Dict, Any, List, Tuple
382
+ # from pydantic import BaseModel, Field
383
+ # from google.api_core import exceptions as google_exceptions
384
+ # from google.generativeai import GenerativeModel, configure
385
+ # from dotenv import load_dotenv
386
+ # import seaborn as sns
387
+ # import datetime as dt
388
+ # from supabase_service import upload_file_to_supabase
389
+
390
+ # pd.set_option('display.max_columns', None)
391
+ # pd.set_option('display.max_rows', None)
392
+ # pd.set_option('display.max_colwidth', None)
393
+
394
+ # load_dotenv()
395
+
396
+ # API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
397
+ # MODEL_NAME = 'gemini-2.0-flash'
398
+
399
+ # class FileProps(BaseModel):
400
+ # fileName: str
401
+ # filePath: str
402
+ # fileType: str # 'csv' | 'image'
403
+
404
+ # class Files(BaseModel):
405
+ # csv_files: List[FileProps]
406
+ # image_files: List[FileProps]
407
+
408
+ # class FileBoxProps(BaseModel):
409
+ # files: Files
410
+
411
+ # os.environ['MPLBACKEND'] = 'agg'
412
+ # import matplotlib.pyplot as plt
413
+ # plt.show = lambda: None
414
+
415
+ # logging.basicConfig(
416
+ # level=logging.INFO,
417
+ # format='%(asctime)s - %(levelname)s - %(message)s'
418
+ # )
419
+ # logger = logging.getLogger(__name__)
420
+
421
+ # class GeminiInstance:
422
+ # """Wrapper for a single Gemini API instance"""
423
+
424
+ # def __init__(self, api_key: str):
425
+ # self.api_key = api_key
426
+ # self.model = None
427
+ # self.active = False
428
+ # self.failure_count = 0
429
+ # self.last_error = None
430
+
431
+ # def initialize(self) -> bool:
432
+ # try:
433
+ # configure(api_key=self.api_key)
434
+ # self.model = GenerativeModel(MODEL_NAME)
435
+ # self.active = True
436
+ # logger.info(f"Initialized Gemini instance with key: {self._mask_key()}")
437
+ # return True
438
+ # except Exception as e:
439
+ # self.last_error = str(e)
440
+ # self.failure_count += 1
441
+ # logger.error(f"Failed to initialize Gemini instance: {self._mask_key()}. Error: {str(e)}")
442
+ # return False
443
+
444
+ # def _mask_key(self) -> str:
445
+ # return f"{self.api_key[:8]}...{self.api_key[-4:]}" if self.api_key else "None"
446
+
447
+ # def generate_content(self, prompt: str) -> Tuple[Optional[str], Optional[Exception]]:
448
+ # try:
449
+ # response = self.model.generate_content(prompt)
450
+ # return response.text, None
451
+ # except Exception as e:
452
+ # self.last_error = str(e)
453
+ # self.failure_count += 1
454
+ # return None, e
455
+
456
+ # class GeminiPool:
457
+ # """Pool of Gemini API instances with automatic failover"""
458
+
459
+ # def __init__(self, api_keys: List[str]):
460
+ # self.instances = [GeminiInstance(key) for key in api_keys]
461
+ # self.current_index = 0
462
+ # self.total_attempts = 0
463
+
464
+ # def get_active_instance(self) -> Optional[GeminiInstance]:
465
+ # """Get next available instance with automatic rotation"""
466
+ # if not self.instances:
467
+ # return None
468
+
469
+ # for _ in range(len(self.instances)):
470
+ # instance = self.instances[self.current_index]
471
+ # self.current_index = (self.current_index + 1) % len(self.instances)
472
+ # self.total_attempts += 1
473
+
474
+ # if instance.active or instance.initialize():
475
+ # return instance
476
+
477
+ # return None
478
+
479
+ # def should_retry(self, error: Exception) -> bool:
480
+ # """Determine if the error is retryable"""
481
+ # if isinstance(error, google_exceptions.ResourceExhausted):
482
+ # return True
483
+ # if isinstance(error, google_exceptions.TooManyRequests):
484
+ # return True
485
+ # if isinstance(error, google_exceptions.ServiceUnavailable):
486
+ # return True
487
+
488
+ # error_str = str(error).lower()
489
+ # retry_phrases = [
490
+ # 'quota',
491
+ # 'limit',
492
+ # 'exhausted',
493
+ # 'retry',
494
+ # 'unavailable',
495
+ # 'overloaded',
496
+ # '429',
497
+ # '503'
498
+ # ]
499
+ # return any(phrase in error_str for phrase in retry_phrases)
500
+
501
+ # class PythonREPL:
502
+ # """Secure Python REPL with file generation tracking"""
503
+
504
+ # def __init__(self, df: pd.DataFrame):
505
+ # self.df = df
506
+ # self.output_dir = os.path.abspath(f'generated_outputs/{uuid.uuid4()}')
507
+ # os.makedirs(self.output_dir, exist_ok=True)
508
+ # self.local_env = {
509
+ # "pd": pd,
510
+ # "df": self.df.copy(),
511
+ # "plt": plt,
512
+ # "os": os,
513
+ # "uuid": uuid,
514
+ # "sns": sns,
515
+ # "json": json,
516
+ # "dt": dt,
517
+ # "output_dir": self.output_dir
518
+ # }
519
+
520
+ # def execute(self, code: str) -> Dict[str, Any]:
521
+ # old_stdout = sys.stdout
522
+ # sys.stdout = mystdout = StringIO()
523
+ # file_tracker = {
524
+ # 'csv_files': set(),
525
+ # 'image_files': set()
526
+ # }
527
+
528
+ # try:
529
+ # code = f"""
530
+ # import matplotlib.pyplot as plt
531
+ # plt.switch_backend('agg')
532
+ # {code}
533
+ # plt.close('all')
534
+ # """
535
+ # exec(code, self.local_env)
536
+ # self.df = self.local_env.get('df', self.df)
537
+
538
+ # # Track generated files
539
+ # for fname in os.listdir(self.output_dir):
540
+ # if fname.endswith('.csv'):
541
+ # file_tracker['csv_files'].add(fname)
542
+ # elif fname.lower().endswith(('.png', '.jpg', '.jpeg')):
543
+ # file_tracker['image_files'].add(fname)
544
+
545
+ # error = False
546
+ # error_msg = None
547
+ # except Exception as e:
548
+ # error_msg = traceback.format_exc()
549
+ # error = True
550
+ # finally:
551
+ # sys.stdout = old_stdout
552
+
553
+ # return {
554
+ # "output": mystdout.getvalue(),
555
+ # "error": error,
556
+ # "error_message": error_msg if error else None,
557
+ # "df": self.local_env.get('df', self.df),
558
+ # "output_dir": self.output_dir,
559
+ # "files": {
560
+ # "csv": [os.path.join(self.output_dir, f) for f in file_tracker['csv_files']],
561
+ # "images": [os.path.join(self.output_dir, f) for f in file_tracker['image_files']]
562
+ # }
563
+ # }
564
+
565
+ # class RethinkAgent(BaseModel):
566
+ # df: pd.DataFrame
567
+ # max_retries: int = Field(default=5, ge=1)
568
+ # current_retry: int = Field(default=0, ge=0)
569
+ # repl: Optional[PythonREPL] = None
570
+ # gemini_pool: Optional[GeminiPool] = None
571
+
572
+ # class Config:
573
+ # arbitrary_types_allowed = True
574
+
575
+ # def _extract_code(self, response: str) -> str:
576
+ # code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
577
+ # return code_match.group(1).strip() if code_match else response.strip()
578
+
579
+ # def _generate_initial_prompt(self, query: str) -> str:
580
+ # return f"""Generate DIRECT EXECUTION CODE (no functions, no explanations) following STRICT RULES:
581
+
582
+ # MANDATORY REQUIREMENTS:
583
+ # 1. Operate directly on existing 'df' variable
584
+ # 2. Save ALL final DataFrames to CSV using: df.to_csv(f'{{output_dir}}/descriptive_name.csv')
585
+ # 3. For visualizations: plt.savefig(f'{{output_dir}}/chart_name.png')
586
+ # 4. Use EXACTLY this structure:
587
+ # # Data processing
588
+ # df_processed = df[...] # filtering/grouping
589
+ # # Save results
590
+ # df_processed.to_csv(f'{{output_dir}}/result.csv')
591
+ # # Visualizations (if needed)
592
+ # plt.figure()
593
+ # ... plotting code ...
594
+ # plt.savefig(f'{{output_dir}}/chart.png')
595
+ # plt.close()
596
+
597
+ # FORBIDDEN:
598
+ # - Function definitions
599
+ # - Dummy data creation
600
+ # - Any code blocks besides pandas operations and matplotlib
601
+ # - Print statements showing dataframes
602
+
603
+ # DATAFRAME COLUMNS: {', '.join(self.df.columns)}
604
+ # DATAFRAME'S FIRST FIVE ROWS: {self.df.head().to_dict('records')}
605
+ # USER QUERY: {query}
606
+
607
+ # EXAMPLE RESPONSE FOR "Sales by region":
608
+ # # Data processing
609
+ # sales_by_region = df.groupby('region')['sales'].sum().reset_index()
610
+ # # Save results
611
+ # sales_by_region.to_csv(f'{{output_dir}}/sales_by_region.csv')
612
+ # """
613
+
614
+ # def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
615
+ # return f"""FIX THIS CODE (failed with: {error}) by STRICTLY FOLLOWING:
616
+
617
+ # 1. REMOVE ALL FUNCTION DEFINITIONS
618
+ # 2. ENSURE DIRECT DF OPERATIONS
619
+ # 3. USE EXPLICIT output_dir PATHS
620
+ # 4. ADD NECESSARY IMPORTS IF MISSING
621
+ # 5. VALIDATE COLUMN NAMES EXIST
622
+
623
+ # BAD CODE:
624
+ # {code}
625
+
626
+ # CORRECTED CODE:"""
627
+
628
+ # def initialize_pool(self) -> bool:
629
+ # self.gemini_pool = GeminiPool(API_KEYS)
630
+ # return True
631
+
632
+ # def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
633
+ # prompt = self._generate_retry_prompt(query, error, previous_code) if error else self._generate_initial_prompt(query)
634
+
635
+ # instance = self.gemini_pool.get_active_instance()
636
+ # if not instance:
637
+ # raise RuntimeError("No available Gemini instances")
638
+
639
+ # response_text, error = instance.generate_content(prompt)
640
+
641
+ # if error:
642
+ # if self.gemini_pool.should_retry(error):
643
+ # logger.warning(f"Retryable error from Gemini: {str(error)}")
644
+ # return self.generate_code(query, error, previous_code)
645
+ # raise error
646
+
647
+ # return self._extract_code(response_text)
648
+
649
+ # def execute_query(self, query: str) -> Dict[str, Any]:
650
+ # self.repl = PythonREPL(self.df)
651
+ # result = None
652
+
653
+ # while self.current_retry < self.max_retries:
654
+ # try:
655
+ # code = self.generate_code(query,
656
+ # result["error_message"] if result else None,
657
+ # result["code"] if result else None)
658
+ # execution_result = self.repl.execute(code)
659
+
660
+ # if execution_result["error"]:
661
+ # self.current_retry += 1
662
+ # result = {
663
+ # "error_message": execution_result["error_message"],
664
+ # "code": code
665
+ # }
666
+ # else:
667
+ # return {
668
+ # "text": execution_result["output"],
669
+ # "csv_files": execution_result["files"]["csv"],
670
+ # "image_files": execution_result["files"]["images"]
671
+ # }
672
+ # except Exception as e:
673
+ # return {
674
+ # "error": f"Critical failure: {str(e)}",
675
+ # "csv_files": [],
676
+ # "image_files": []
677
+ # }
678
+
679
+ # return {
680
+ # "error": f"Failed after {self.max_retries} retries",
681
+ # "csv_files": [],
682
+ # "image_files": []
683
+ # }
684
+
685
+ # def gemini_llm_chat(csv_url: str, query: str) -> Dict[str, Any]:
686
+ # try:
687
+ # df = pd.read_csv(csv_url)
688
+ # agent = RethinkAgent(df=df)
689
+
690
+ # if not agent.initialize_pool():
691
+ # return {"error": "API pool initialization failed"}
692
+
693
+ # result = agent.execute_query(query)
694
+
695
+ # if "error" in result:
696
+ # return result
697
+
698
+ # return {
699
+ # "message": result["text"],
700
+ # "csv_files": result["csv_files"],
701
+ # "image_files": result["image_files"]
702
+ # }
703
+ # except Exception as e:
704
+ # logger.error(f"Processing failed: {str(e)}", exc_info=True)
705
+ # return {
706
+ # "error": f"Processing error: {str(e)}",
707
+ # "csv_files": [],
708
+ # "image_files": []
709
+ # }
710
+
711
+ # async def generate_csv_report(csv_url: str, query: str) -> FileBoxProps:
712
+ # try:
713
+ # result = gemini_llm_chat(csv_url, query)
714
+ # logger.info(f"Raw result from gemini_llm_chat: {result}")
715
+
716
+ # csv_files = []
717
+ # image_files = []
718
+
719
+ # if isinstance(result, dict) and 'csv_files' in result and 'image_files' in result:
720
+ # # Process CSV files
721
+ # for csv_path in result['csv_files']:
722
+ # if os.path.exists(csv_path):
723
+ # file_name = os.path.basename(csv_path)
724
+ # try:
725
+ # unique_file_name = f"{uuid.uuid4()}_{file_name}"
726
+ # public_url = await upload_file_to_supabase(
727
+ # file_path=csv_path,
728
+ # file_name=unique_file_name
729
+ # )
730
+ # csv_files.append(FileProps(
731
+ # fileName=file_name,
732
+ # filePath=public_url,
733
+ # fileType="csv"
734
+ # ))
735
+ # os.remove(csv_path)
736
+ # except Exception as upload_error:
737
+ # logger.error(f"Failed to upload CSV {file_name}: {str(upload_error)}")
738
+ # continue
739
+
740
+ # # Process image files
741
+ # for img_path in result['image_files']:
742
+ # if os.path.exists(img_path):
743
+ # file_name = os.path.basename(img_path)
744
+ # try:
745
+ # unique_file_name = f"{uuid.uuid4()}_{file_name}"
746
+ # public_url = await upload_file_to_supabase(
747
+ # file_path=img_path,
748
+ # file_name=unique_file_name
749
+ # )
750
+ # image_files.append(FileProps(
751
+ # fileName=file_name,
752
+ # filePath=public_url,
753
+ # fileType="image"
754
+ # ))
755
+ # os.remove(img_path)
756
+ # except Exception as upload_error:
757
+ # logger.error(f"Failed to upload image {file_name}: {str(upload_error)}")
758
+ # continue
759
+
760
+ # return FileBoxProps(
761
+ # files=Files(
762
+ # csv_files=csv_files,
763
+ # image_files=image_files
764
+ # )
765
+ # )
766
+ # else:
767
+ # raise ValueError("Unexpected response format from gemini_llm_chat")
768
+
769
+ # except Exception as e:
770
+ # logger.error(f"Report generation failed: {str(e)}", exc_info=True)
771
+ # return FileBoxProps(
772
+ # files=Files(
773
+ # csv_files=[],
774
+ # image_files=[]
775
+ # )
776
+ # )
groq_chart.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from util_service import process_answer
2
+ import os
3
+ import threading
4
+ import uuid
5
+ from dotenv import load_dotenv
6
+ from langchain_groq import ChatGroq
7
+ import pandas as pd
8
+ from pandasai import SmartDataframe
9
+ import numpy as np
10
+ import logging
11
+ from csv_service import clean_data
12
+ from util_service import handle_out_of_range_float
13
+
14
+ load_dotenv()
15
+
16
+ # Thread-safe key management for langchain_csv_chat
17
+ current_langchain_key_index = 0
18
+ current_langchain_key_lock = threading.Lock()
19
+
20
+ # Load environment variables
21
+ groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
22
+ model_name = os.getenv("GROQ_LLM_MODEL")
23
+
24
+ # Set up logging
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ instructions = """
30
+
31
+ - Please ensure that each value is clearly visible, You may need to adjust the font size, rotate the labels, or use truncation to improve readability (if needed).
32
+ - For multiple charts, arrange them in a grid format (2x2, 3x3, etc.)
33
+ - Use colorblind-friendly palette
34
+ - Read above instructions and follow them.
35
+
36
+ """
37
+
38
+ # Thread-safe configuration for chart endpoints
39
+ current_groq_chart_key_index = 0
40
+ current_groq_chart_lock = threading.Lock()
41
+
42
+ def model():
43
+ global current_groq_chart_key_index, current_groq_chart_lock
44
+ with current_groq_chart_lock:
45
+ if current_groq_chart_key_index >= len(groq_api_keys):
46
+ raise Exception("All API keys exhausted for chart generation")
47
+ api_key = groq_api_keys[current_groq_chart_key_index]
48
+ return ChatGroq(model=model_name, api_key=api_key)
49
+
50
+ def groq_chart(csv_url: str, question: str):
51
+ global current_groq_chart_key_index, current_groq_chart_lock
52
+
53
+ for attempt in range(len(groq_api_keys)):
54
+ try:
55
+ # Clean cache before processing
56
+ cache_db_path = "/workspace/cache/cache_db_0.11.db"
57
+ if os.path.exists(cache_db_path):
58
+ try:
59
+ os.remove(cache_db_path)
60
+ except Exception as e:
61
+ logger.info(f"Cache cleanup error: {e}")
62
+
63
+ data = clean_data(csv_url)
64
+ with current_groq_chart_lock:
65
+ current_api_key = groq_api_keys[current_groq_chart_key_index]
66
+
67
+ llm = ChatGroq(model=model_name, api_key=current_api_key)
68
+
69
+ # Generate unique filename using UUID
70
+ chart_filename = f"chart_{uuid.uuid4()}.png"
71
+ chart_path = os.path.join("generated_charts", chart_filename)
72
+
73
+ # Configure SmartDataframe with chart settings
74
+ df = SmartDataframe(
75
+ data,
76
+ config={
77
+ 'llm': llm,
78
+ 'save_charts': True, # Enable chart saving
79
+ 'open_charts': False,
80
+ 'save_charts_path': os.path.dirname(chart_path), # Directory to save
81
+ 'custom_chart_filename': chart_filename # Unique filename
82
+ }
83
+ )
84
+
85
+ answer = df.chat(question + instructions)
86
+
87
+ if process_answer(answer):
88
+ return "Chart not generated"
89
+ return answer
90
+
91
+ except Exception as e:
92
+ error = str(e)
93
+ if "429" in error:
94
+ with current_groq_chart_lock:
95
+ current_groq_chart_key_index = (current_groq_chart_key_index + 1) % len(groq_api_keys)
96
+ else:
97
+ logger.info(f"Chart generation error: {error}")
98
+ return {"error": error}
99
+
100
+ logger.info("All API keys exhausted for chart generation")
101
+ return None
groq_chat.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import uuid
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ import pandas as pd
7
+ from pandasai import SmartDataframe
8
+ import numpy as np
9
+ import logging
10
+ from csv_service import clean_data
11
+ from util_service import handle_out_of_range_float
12
+
13
+ load_dotenv()
14
+
15
+ # Thread-safe key management for langchain_csv_chat
16
+ current_groq_key_index = 0
17
+ current_groq_key_lock = threading.Lock()
18
+
19
+ # Load environment variables
20
+ groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
21
+ model_name = os.getenv("GROQ_LLM_MODEL")
22
+
23
+ # Set up logging
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ def groq_chat(csv_url: str, question: str):
28
+ global current_groq_key_index, current_groq_key_lock
29
+
30
+ while True:
31
+ with current_groq_key_lock:
32
+ if current_groq_key_index >= len(groq_api_keys):
33
+ return {"error": "All API keys exhausted."}
34
+ current_api_key = groq_api_keys[current_groq_key_index]
35
+
36
+ try:
37
+ # Delete cache file if exists
38
+ cache_db_path = "/workspace/cache/cache_db_0.11.db"
39
+ if os.path.exists(cache_db_path):
40
+ try:
41
+ os.remove(cache_db_path)
42
+ except Exception as e:
43
+ logger.info(f"Error deleting cache DB file: {e}")
44
+
45
+ data = clean_data(csv_url)
46
+ llm = ChatGroq(model=model_name, api_key=current_api_key)
47
+ # Generate unique filename using UUID
48
+ chart_filename = f"chart_{uuid.uuid4()}.png"
49
+ chart_path = os.path.join("generated_charts", chart_filename)
50
+
51
+ # Configure SmartDataframe with chart settings
52
+ df = SmartDataframe(
53
+ data,
54
+ config={
55
+ 'llm': llm,
56
+ 'save_charts': True, # Enable chart saving
57
+ 'open_charts': False,
58
+ 'save_charts_path': os.path.dirname(chart_path), # Directory to save
59
+ 'custom_chart_filename': chart_filename # Unique filename
60
+ }
61
+ )
62
+
63
+ answer = df.chat(question)
64
+
65
+ # Process different response types
66
+ if isinstance(answer, pd.DataFrame):
67
+ processed = answer.apply(handle_out_of_range_float).to_dict(orient="records")
68
+ elif isinstance(answer, pd.Series):
69
+ processed = answer.apply(handle_out_of_range_float).to_dict()
70
+ elif isinstance(answer, list):
71
+ processed = [handle_out_of_range_float(item) for item in answer]
72
+ elif isinstance(answer, dict):
73
+ processed = {k: handle_out_of_range_float(v) for k, v in answer.items()}
74
+ else:
75
+ processed = {"answer": str(handle_out_of_range_float(answer))}
76
+
77
+ return processed
78
+
79
+ except Exception as e:
80
+ error_message = str(e)
81
+ if "429" in error_message:
82
+ with current_groq_key_lock:
83
+ current_groq_key_index += 1
84
+ if current_groq_key_index >= len(groq_api_keys):
85
+ logger.info("All API keys exhausted.")
86
+ return None
87
+ else:
88
+ logger.info(f"Error with API key index {current_groq_key_index}: {error_message}")
89
+ return None
lc_groq_chart.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import threading
4
+ import uuid
5
+ from dotenv import load_dotenv
6
+ from langchain_groq import ChatGroq
7
+ from matplotlib import pyplot as plt
8
+ import matplotlib
9
+ import numpy as np
10
+ import pandas as pd
11
+ from csv_service import clean_data, extract_chart_filenames
12
+ from langchain_experimental.tools import PythonAstREPLTool
13
+ from langchain_experimental.agents import create_pandas_dataframe_agent
14
+ from util_service import _prompt_generator
15
+ import seaborn as sns
16
+
17
+ load_dotenv()
18
+
19
+ # Thread-safe key management for langchain_csv_chat
20
+ current_langchain_key_index = 0
21
+ current_langchain_key_lock = threading.Lock()
22
+
23
+ # Load environment variables
24
+ groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
25
+ model_name = os.getenv("GROQ_LLM_MODEL")
26
+
27
+ # Set up logging
28
+ logging.basicConfig(level=logging.INFO)
29
+ logger = logging.getLogger(__name__)
30
+
31
+ current_langchain_chart_key_index = 0
32
+ current_langchain_chart_lock = threading.Lock()
33
+
34
+ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
35
+ global current_langchain_chart_key_index, current_langchain_chart_lock
36
+
37
+ data = clean_data(csv_url)
38
+
39
+ for attempt in range(len(groq_api_keys)):
40
+ try:
41
+ with current_langchain_chart_lock:
42
+ api_key = groq_api_keys[current_langchain_chart_key_index]
43
+ current_key = current_langchain_chart_key_index
44
+ current_langchain_chart_key_index = (current_langchain_chart_key_index + 1) % len(groq_api_keys)
45
+
46
+ llm = ChatGroq(model=model_name, api_key=api_key)
47
+ tool = PythonAstREPLTool(locals={
48
+ "df": data,
49
+ "pd": pd,
50
+ "np": np,
51
+ "plt": plt,
52
+ "sns": sns,
53
+ "matplotlib": matplotlib,
54
+ "uuid": uuid
55
+ })
56
+
57
+ agent = create_pandas_dataframe_agent(
58
+ llm,
59
+ data,
60
+ agent_type="tool-calling",
61
+ verbose=True,
62
+ allow_dangerous_code=True,
63
+ extra_tools=[tool],
64
+ return_intermediate_steps=True
65
+ )
66
+
67
+ result = agent.invoke({"input": _prompt_generator(f"{question} and use this csv_url: {csv_url} to read the csv file", True)})
68
+ output = result.get("output", "")
69
+
70
+ # Verify chart file creation
71
+ chart_files = extract_chart_filenames(output)
72
+ if len(chart_files) > 0:
73
+ return chart_files
74
+
75
+ if attempt < len(groq_api_keys) - 1:
76
+ logger.info(f"Langchain chart error (key {current_key}): {output}")
77
+
78
+ except Exception as e:
79
+ logger.info(f"Langchain chart error (key {current_key}): {str(e)}")
80
+
81
+ logger.info("All API keys exhausted for chart generation")
82
+ return None
lc_groq_chat.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import threading
4
+ from langchain_groq import ChatGroq
5
+ from matplotlib import pyplot as plt
6
+ import matplotlib
7
+ import numpy as np
8
+ import pandas as pd
9
+ from dotenv import load_dotenv
10
+ from csv_service import clean_data
11
+ import seaborn as sns
12
+ from langchain_experimental.tools import PythonAstREPLTool
13
+ from langchain_experimental.agents import create_pandas_dataframe_agent
14
+ from util_service import _prompt_generator
15
+
16
+ load_dotenv()
17
+
18
+ # Thread-safe key management for langchain_csv_chat
19
+ current_langchain_key_index = 0
20
+ current_langchain_key_lock = threading.Lock()
21
+
22
+ # Load environment variables
23
+ groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
24
+ model_name = os.getenv("GROQ_LLM_MODEL")
25
+
26
+ # Set up logging
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+ def langchain_csv_chat(csv_url: str, question: str, chart_required: bool):
31
+ global current_langchain_key_index, current_langchain_key_lock
32
+
33
+ data = clean_data(csv_url)
34
+ attempts = 0
35
+
36
+ while attempts < len(groq_api_keys):
37
+ with current_langchain_key_lock:
38
+ if current_langchain_key_index >= len(groq_api_keys):
39
+ current_langchain_key_index = 0
40
+ api_key = groq_api_keys[current_langchain_key_index]
41
+ current_key = current_langchain_key_index
42
+ current_langchain_key_index += 1
43
+ attempts += 1
44
+
45
+ try:
46
+ llm = ChatGroq(model=model_name, api_key=api_key)
47
+ tool = PythonAstREPLTool(locals={
48
+ "df": data,
49
+ "pd": pd,
50
+ "np": np,
51
+ "plt": plt,
52
+ "sns": sns,
53
+ "matplotlib": matplotlib
54
+ })
55
+
56
+ agent = create_pandas_dataframe_agent(
57
+ llm,
58
+ data,
59
+ agent_type="tool-calling",
60
+ verbose=True,
61
+ allow_dangerous_code=True,
62
+ extra_tools=[tool],
63
+ return_intermediate_steps=True
64
+ )
65
+
66
+ prompt = _prompt_generator(question, chart_required)
67
+ result = agent.invoke({"input": prompt})
68
+ return result.get("output")
69
+
70
+ except Exception as e:
71
+ logger.info(f"Error with key index {current_key}: {str(e)}")
72
+
73
+ # If all keys are exhausted, return None
74
+ logger.info("All API keys have been exhausted.")
75
+ return None
orchestrator_agent.py CHANGED
@@ -142,51 +142,6 @@ def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agen
142
  5. Offer next-step suggestions
143
  """
144
 
145
- # system_prompt = (
146
- # "You are a data analyst. "
147
- # "You have all the tools you need to answer any question. "
148
- # "If the user asks for multiple answers or charts, break the question into several well-defined questions. "
149
- # "Pass the CSV URL or file path along with the questions to the tools to generate the answer. "
150
- # "The tools are actually LLMs with Python code execution capabilities. "
151
- # "Modify the query if needed to make it simpler for the LLM to understand. "
152
- # "Answer in a friendly and helpful manner. "
153
- # "**Format images** in Markdown: `![alt_text](direct-image-url)`. "
154
- # f"Your CSV URL is {csv_url}. "
155
- # f"Your CSV metadata is {csv_metadata}."
156
- # )
157
-
158
-
159
- # system_prompt = (
160
- # "You are a data analyst assistant with limited tool capabilities. "
161
- # "Available tools can only handle simple data queries: "
162
- # "- Count rows/columns\n- Calculate basic stats (avg, sum, min/max)\n"
163
- # "- Create simple visualizations (pie charts, bar graphs)\n"
164
- # "- Show column names/types\n\n"
165
-
166
- # "Query Handling Rules:\n"
167
- # "1. If query is complex, ambiguous, or exceeds tool capabilities:\n"
168
- # " - Break into simpler sub-questions\n"
169
- # " - Ask for clarification\n"
170
- # " - Rephrase to nearest simple query\n"
171
- # "2. For 'full report' requests:\n"
172
- # " - Outline possible analysis steps\n"
173
- # " - Ask user to select one component at a time\n\n"
174
-
175
- # "Examples:\n"
176
- # "- Bad query: 'Show me everything'\n"
177
- # " Response: 'I can show row count (10), columns (5: Name, Age...), "
178
- # "or a pie chart of categories. Which would you like?'\n"
179
- # "- Bad query: 'Analyze trends'\n"
180
- # " Response: 'For trend analysis, I can show monthly averages or "
181
- # "year-over-year comparisons. Please specify time period and metric.'\n\n"
182
-
183
- # "Current CSV Context:\n"
184
- # f"- URL: {csv_url}\n"
185
- # f"- Metadata: {csv_metadata}\n\n"
186
-
187
- # "Always format images as: ![Chart Description](direct-image-url)"
188
- # )
189
-
190
  return Agent(
191
  model=initialize_model(api_key),
192
  deps_type=str,
@@ -216,3 +171,149 @@ def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history
216
  # If all keys are exhausted or fail
217
  print("All API keys have been exhausted or failed.")
218
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  5. Offer next-step suggestions
143
  """
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  return Agent(
146
  model=initialize_model(api_key),
147
  deps_type=str,
 
171
  # If all keys are exhausted or fail
172
  print("All API keys have been exhausted or failed.")
173
  return None
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+
185
+ # import os
186
+ # from typing import Dict, List, Any
187
+ # from pydantic_ai import Agent
188
+ # from pydantic_ai.models.gemini import GeminiModel
189
+ # from pydantic_ai.providers.google_gla import GoogleGLAProvider
190
+ # from pydantic_ai import RunContext
191
+ # from pydantic import BaseModel
192
+ # from google.api_core.exceptions import ResourceExhausted
193
+ # from csv_service import get_csv_basic_info
194
+ # from orchestrator_functions import csv_chart, csv_chat
195
+ # from dotenv import load_dotenv
196
+
197
+ # load_dotenv()
198
+
199
+ # # Thread-safe key management
200
+ # current_gemini_key_index = 0
201
+ # GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
202
+
203
+ # def initialize_model(api_key: str) -> GeminiModel:
204
+ # return GeminiModel(
205
+ # 'gemini-2.0-flash',
206
+ # provider=GoogleGLAProvider(api_key=api_key)
207
+ # )
208
+
209
+ # def is_resource_exhausted_error(result_or_exception) -> bool:
210
+ # """Check if the error indicates resource exhaustion"""
211
+ # error_str = str(result_or_exception).lower()
212
+ # return any(keyword in error_str for keyword in [
213
+ # "resource exhausted",
214
+ # "quota exceeded",
215
+ # "rate limit",
216
+ # "billing",
217
+ # "payment method",
218
+ # "plan.rule"
219
+ # ])
220
+
221
+ # async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
222
+ # answers = []
223
+ # for question in user_questions:
224
+ # answer = await csv_chat(csv_url, question)
225
+ # answers.append(dict(question=question, answer=answer))
226
+ # return answers
227
+
228
+ # async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
229
+ # charts = []
230
+ # for question in user_questions:
231
+ # chart = await csv_chart(csv_url, question)
232
+ # charts.append(dict(question=question, image_url=chart))
233
+ # return charts
234
+
235
+ # def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
236
+ # csv_metadata = get_csv_basic_info(csv_url)
237
+
238
+ # system_prompt = f"""
239
+ # # Role: Expert Data Analysis Assistant
240
+ # # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.
241
+
242
+ # ## Capabilities:
243
+ # - Break complex queries into simpler sub-tasks
244
+
245
+ # ## Instruction Framework:
246
+ # 1. QUERY PROCESSING:
247
+ # - If request contains multiple questions:
248
+ # a) Decompose into logical sub-questions
249
+ # b) Process sequentially
250
+ # c) Combine results coherently
251
+
252
+ # 2. DATA HANDLING:
253
+ # - Always verify CSV structure matches the request
254
+ # - Handle missing/ambiguous data by:
255
+ # a) Asking clarifying questions OR
256
+ # b) Making reasonable assumptions (state them clearly)
257
+
258
+ # 3. VISUALIZATION STANDARDS:
259
+ # - Format images as: `![Description](direct-url)`
260
+ # - Include axis labels and titles
261
+ # - Use appropriate chart types
262
+
263
+ # 4. COMMUNICATION PROTOCOL:
264
+ # - Friendly, professional tone
265
+ # - Explain technical terms
266
+ # - Summarize key findings
267
+ # - Highlight limitations/caveats
268
+
269
+ # 5. TOOL USAGE:
270
+ # - Can process statistical operations
271
+ # - Supports visualization libraries
272
+
273
+ # ## Current Context:
274
+ # - Working with CSV_URL: {csv_url}
275
+ # - Dataset overview: {csv_metadata}
276
+ # - Your conversation history: {conversation_history}
277
+ # - Output format: Markdown compatible
278
+ # """
279
+
280
+ # return Agent(
281
+ # model=initialize_model(api_key),
282
+ # deps_type=str,
283
+ # tools=[generate_csv_answer, generate_chart],
284
+ # system_prompt=system_prompt
285
+ # )
286
+
287
+ # def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
288
+ # global current_gemini_key_index
289
+
290
+ # while current_gemini_key_index < len(GEMINI_API_KEYS):
291
+ # api_key = GEMINI_API_KEYS[current_gemini_key_index]
292
+
293
+ # try:
294
+ # print(f"Attempting with API key index {current_gemini_key_index}")
295
+ # agent = create_agent(csv_url, api_key, conversation_history)
296
+ # result = agent.run_sync(user_question)
297
+
298
+ # # Check if result indicates resource exhaustion
299
+ # if result.data and is_resource_exhausted_error(result.data):
300
+ # print(f"Resource exhausted in response for key {current_gemini_key_index}")
301
+ # current_gemini_key_index += 1
302
+ # continue
303
+
304
+ # return result.data
305
+
306
+ # except ResourceExhausted as e:
307
+ # print(f"Resource exhausted for API key {current_gemini_key_index}: {e}")
308
+ # current_gemini_key_index += 1
309
+ # continue
310
+
311
+ # except Exception as e:
312
+ # if is_resource_exhausted_error(e):
313
+ # print(f"Resource exhausted error detected for key {current_gemini_key_index}")
314
+ # current_gemini_key_index += 1
315
+ # continue
316
+ # print(f"Non-recoverable error with key {current_gemini_key_index}: {e}")
317
+ # return f"Error processing request: {str(e)}"
318
+
319
+ # return "All API keys have been exhausted. Please update billing information."
orchestrator_functions.py CHANGED
@@ -612,7 +612,6 @@ async def csv_chart(csv_url: str, query: str):
612
 
613
  except Exception as openai_error:
614
  logger.info(f"OpenAI failed ({str(openai_error)}), trying raw Groq...")
615
- return 'Sorry, I could not generate a chart...'
616
  # --- 2. Second Attempt: Raw Groq ---
617
  try:
618
  groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
 
612
 
613
  except Exception as openai_error:
614
  logger.info(f"OpenAI failed ({str(openai_error)}), trying raw Groq...")
 
615
  # --- 2. Second Attempt: Raw Groq ---
616
  try:
617
  groq_result = await asyncio.to_thread(groq_chart, csv_url, query)