Soumik555 commited on
Commit
da3abe4
·
1 Parent(s): a20826a
.env ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GOOGLE_GENERATIVE_AI_API_KEYS=AIzaSyC6CuXP7oMHbQymv5desJ7HJypSPisYN7s,AIzaSyAzV1YGajXhC2N8n8b3bgU1PHUXNWdZiUk,AIzaSyAYvv5urC0lhzNYYO1f4a4EYqTsZrmubrM,AIzaSyD7VsIKjtNBlQUWXQ_bIFbl240f2AUT7nc,AIzaSyCFnFsqplkNeQFjRh2EhkK90t48wkmyJQU
2
+ GOOGLE_GENERATIVE_AI_MODEL=gemini-2.0-flash-lite-preview-02-05
3
+
4
+ # Pandas API keys
5
+ PANDASAI_API_KEYS=$2a$10$VVwPEnzFxnEnJhk2u5ef1ewTuT3rNK59QpYQWAhUY29FHH4b7fwNC,$2a$10$5ikmN9RtNWHvP8aLnHfm.epO/XhVF1Pvk1Chy2Fqa.4x232a374xK,$2a$10$aAvr1DH3Pt3KLPDYa.JED..d83Pl6M4xnQd6uY8fadNkqSEv9KaYK,$2a$10$tJkqyS9Us36ernP1N4/8dOE088rCm7MC3gIj2RQMlaalY34EkkDHy,$2a$10$V0tThT/XnmlHbJucM00yN.hxz9r3ZwVqe0sQRQwDZAGHmhMq81D7O,$2a$10$d9vj8iPtD/L/i2B5AhiKTexOSpZ52XZTRUDkZa4p0vnI6RCj7f0K2,$2a$10$PZdCvVJB8301iDIZrZ2z8uB9d68kaBeOjaOIbbXGgqlZ2frbTm0eG,$2a$10$SHK.YTrTQcol/yM/RD8tZOcIF2fUTXtaETpDo8G0At90NxQ1HGk.C,$2a$10$QYz2Fp2fFZNq80HjAC/Okuy/PZFMgGgpPuQAyFDVtvB0G9bCn8Cee,$2a$10$SGY3HoCX0jbBXHSbpwGH1OEC/yPwT5792MjSZeWYVLew52pE4gR0y,$2a$10$QHPpvXwCXhHtKyx4jWMTh.8Mz1azTEQbDdDMpmikOzdgKtFfOq3FG,$2a$10$KoTsqdLPNIBiLRHWUg/6guqxNrB4ByljnMDTN0HJXmGl.PagdxpGm,$2a$10$ERsxnbIwk0LOMqmFX1SfjuMSXzh5gsBqm1BnYXFNEBAS3J1AfK24m,$2a$10$zwX4F0/pxXgmuAfDteFlHeXswX8cvVAvkv8mBAJ4WLvAEaUM3v266,$2a$10$LPA4FUIjg6CbZYEhi3NLRuY2Yar5SbT9gYoQ/oZuPaFUxNUyaJ/ii,$2a$10$kLDISr9ivaqcYiAZ1TmBOeclXK0C5a/LPPB3Rsxme19NwVPhznQya,$2a$10$qpoxy4k4sQya0tY7/lSEkuEuwVQGEl757A.jVPGNEh6p5tN6Yofyq,$2a$10$TDndpw.NWwx2k5X.9eI30uAaga8pbYO/erUEblVGcj6ydzSgzdVde,$2a$10$TtZtCWXgVSUhaNMMsuOjLuC6tCY1GTzUR/PvIUdowXYQdmefgpvbW,$2a$10$Orj1ZiURJkREK30gdwEYLeV7mY657jJhif8SckIPdvctjkWHXHrq6,$2a$10$CxEXDLjFtK1.nE9GuIt1duxLbvYtz2EA7x1LqddNF44kKVcc8aGZC
6
+ PANDASAI_ERROR_MESSAGE=Your BambooLLM inference limit has been reached. Please use pandas-ai with any other LLM available in the library. If you wish to continue using BambooLLM, visit this link https://tally.so/r/wzZNWg for more information.
7
+
8
+ # Nessasary file paths
9
+ IMAGE_FILE_PATH=generated_charts
10
+ IMAGE_NOT_FOUND=exports/charts/image_not_found.png
11
+
12
+ # Allowed hosts
13
+ ALLOWED_HOSTS=https://vercel-test-10-rho.vercel.app,https://freechat-rho.vercel.app,https://chatcsvandpdf.vercel.app
14
+
15
+ # Auth token
16
+ AUTH_TOKEN=raVkp0VgY3z0wICaUi95d73VSUGxu8DA3mXUoiheC8B8gBQ1Rk6Zj4aO6kcba3gWy8KU10deRMte8GvG36wGauocooLLp6y9fJt7XUgYgzvxz6y8cfhwuifWSQzeB8qMbQXZjkH1oP3rocFGArSzWZdj5phDpGdwQoxkuBpOSfA4WhMPhMr4HdohjBuiy2TlIa7ICpd5fq35LCRt2ZERaXYGUbD7MqrpDOICgXyABTjTWGHe6r0hMK7k4JiIM36rZ028a777FausbLPke9V0lPqAz5ialT0j7RbMj2fxheiZCoErx15Qx5dGfpcS9O5Xi6bpTADrYcRej0wrJv3rZrcrCBrY4m6ep0eXRkElQM389H2KFu1MlI7Twf0TxcerPh6GMAZTg2YefZU1QE8Y0ODsruCU3Jiq6UfaYXMHP5YMwpcwwHzioybjFVfuMtDePjya7y7qwdwjXTqDDJAsSe061sMDKvHpPYgAOpaYerTVy4qGMuWTwDceUzqs39X0
17
+
18
+ # GROQ API keys
19
+ GROQ_API_KEYS=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,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,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,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,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
20
+ GROQ_LLM_MODEL=llama3-70b-8192
21
+
22
+ ALLOW_DANGEROUS_CODE=true
Dockerfile CHANGED
@@ -13,6 +13,9 @@ RUN mkdir -p /app/cache && chmod -R 777 /app/cache
13
  # Create the log file and set permissions
14
  RUN touch /app/pandasai.log && chmod 666 /app/pandasai.log
15
 
 
 
 
16
  # Copy the requirements file first
17
  COPY requirements.txt .
18
 
 
13
  # Create the log file and set permissions
14
  RUN touch /app/pandasai.log && chmod 666 /app/pandasai.log
15
 
16
+ # Set the Matplotlib cache directory to /app/cache
17
+ ENV MPLCONFIGDIR=/app/cache
18
+
19
  # Copy the requirements file first
20
  COPY requirements.txt .
21
 
__pycache__/controller.cpython-311.pyc ADDED
Binary file (27 kB). View file
 
__pycache__/csv_service.cpython-311.pyc ADDED
Binary file (6.15 kB). View file
 
__pycache__/intitial_q_handler.cpython-311.pyc ADDED
Binary file (1.49 kB). View file
 
__pycache__/util_service.cpython-311.pyc ADDED
Binary file (4.89 kB). View file
 
api_key_rotation.log ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-23 09:01:40,193 - INFO - Max CPUs: 12
2
+ 2025-02-23 09:01:54,167 - INFO - Max CPUs: 12
3
+ 2025-02-23 09:02:08,264 - INFO - Max CPUs: 12
4
+ 2025-02-23 09:03:32,223 - INFO - Max CPUs: 12
5
+ 2025-02-23 09:07:09,352 - ERROR - Error processing request: 'module' object is not callable
6
+ 2025-02-23 09:10:31,052 - ERROR - Error processing request: 'module' object is not callable
7
+ 2025-02-23 09:12:09,279 - ERROR - Error processing request: 'module' object is not callable
8
+ 2025-02-23 09:12:12,022 - ERROR - Error processing request: 'module' object is not callable
9
+ 2025-02-23 09:18:07,201 - ERROR - Error processing request: 'module' object is not callable
10
+ 2025-02-23 09:21:06,544 - ERROR - Error processing request: 'module' object is not callable
11
+ 2025-02-23 09:21:08,510 - ERROR - Error processing request: 'module' object is not callable
12
+ 2025-02-23 09:26:37,959 - INFO - Max CPUs: 12
13
+ 2025-02-23 09:26:48,431 - INFO - Successfully configured with key: AIzaSyC6...YN7s
14
+ 2025-02-23 09:28:48,467 - INFO - Max CPUs: 12
15
+ 2025-02-23 09:29:48,739 - INFO - Successfully configured with key: AIzaSyC6...YN7s
16
+ 2025-02-23 09:29:51,076 - INFO - gemini_answer -->
17
+ 2025-02-23 09:30:08,183 - INFO - Max CPUs: 12
18
+ 2025-02-23 09:31:24,934 - INFO - Max CPUs: 12
19
+ 2025-02-23 09:33:09,881 - INFO - Max CPUs: 12
20
+ 2025-02-23 09:35:32,429 - INFO - Max CPUs: 12
21
+ 2025-02-23 09:35:32,513 - INFO - Successfully configured with key: AIzaSyC6...YN7s
22
+ 2025-02-23 09:35:35,038 - INFO - gemini_answer -->
23
+ 2025-02-23 09:37:57,798 - INFO - Max CPUs: 12
24
+ 2025-02-23 09:38:12,937 - INFO - Max CPUs: 12
25
+ 2025-02-23 09:38:13,009 - INFO - Successfully configured with key: AIzaSyC6...YN7s
26
+ 2025-02-23 09:38:15,381 - INFO - Gemini processed result: {'answer': '12\n'}
27
+ 2025-02-23 09:38:15,383 - INFO - gemini_answer -->
28
+ 2025-02-23 09:39:40,048 - INFO - Successfully configured with key: AIzaSyC6...YN7s
29
+ 2025-02-23 09:39:43,293 - INFO - Gemini processed result: {'answer': '8\n'}
30
+ 2025-02-23 09:39:43,294 - INFO - gemini_answer -->
31
+ 2025-02-23 09:39:53,823 - INFO - Max CPUs: 12
32
+ 2025-02-23 09:40:16,383 - INFO - Successfully configured with key: AIzaSyC6...YN7s
33
+ 2025-02-23 09:40:19,010 - INFO - gemini processed result: {'answer': ' id name category price stock date_added description brand\n266 267 Race Beauty 5.09 335 2023-10-12 Central contain pattern education boy provide ... Roberts Inc\n399 400 No Books 5.13 326 2024-02-05 Per son war speak wait assume throughout leade... Rodriguez LLC\n292 293 Arm Home & Kitchen 8.73 44 2023-10-17 Long must maybe hour rather company with recen... Gray Inc\n'}
34
+ 2025-02-23 09:40:19,011 - INFO - gemini_answer -->
35
+ 2025-02-23 10:00:47,546 - INFO - Successfully configured with key: AIzaSyC6...YN7s
36
+ 2025-02-23 10:00:49,976 - INFO - gemini processed result: {'answer': 'id int64\nname object\ncategory object\nprice float64\nstock int64\ndate_added object\ndescription object\nbrand object\ndtype: object\n'}
37
+ 2025-02-23 10:00:49,978 - INFO - gemini_answer -->
38
+ 2025-02-23 10:01:18,233 - INFO - Successfully configured with key: AIzaSyC6...YN7s
39
+ 2025-02-23 10:01:20,775 - INFO - gemini processed result: {'answer': 'Top 3 Costliest Products:\n name price\n815 International 499.99\n132 Five 499.87\n307 Reduce 499.49\n'}
40
+ 2025-02-23 10:01:20,776 - INFO - gemini_answer -->
41
+ 2025-02-23 10:01:30,606 - INFO - Successfully configured with key: AIzaSyC6...YN7s
42
+ 2025-02-23 10:01:33,213 - INFO - gemini processed result: {'answer': ' id name category price stock date_added description brand\n815 816 International Sports 499.99 499 2023-11-06 Training enter fly situation former threat alo... Farmer-Sharp\n132 133 Five Home & Kitchen 499.87 169 2023-11-02 Activity president realize artist brother fill... Richardson-Walker\n307 308 Reduce Clothing 499.49 283 2023-11-27 Home which view city rock seat near business l... Robinson, Rodriguez and Chen\n'}
43
+ 2025-02-23 10:01:33,213 - INFO - gemini_answer -->
44
+ 2025-02-23 10:06:24,399 - INFO - Max CPUs: 12
45
+ 2025-02-23 10:06:34,598 - INFO - Successfully configured with key: AIzaSyC6...YN7s
46
+ 2025-02-23 10:06:37,383 - INFO - gemini processed result: {'answer': ' id name category price stock date_added \\\n815 816 International Sports 499.99 499 2023-11-06 \n132 133 Five Home & Kitchen 499.87 169 2023-11-02 \n307 308 Reduce Clothing 499.49 283 2023-11-27 \n\n description \\\n815 Training enter fly situation former threat alone the trial another. \n132 Activity president realize artist brother fill if maybe time region financial brother trouble. \n307 Home which view city rock seat near business loss federal growth appear. \n\n brand \n815 Farmer-Sharp \n132 Richardson-Walker \n307 Robinson, Rodriguez and Chen \n'}
47
+ 2025-02-23 10:06:37,385 - INFO - gemini_answer -->
controller.py CHANGED
@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
26
  import matplotlib
27
  import seaborn as sns
28
  from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 
29
  from util_service import _prompt_generator, process_answer
30
  from fastapi.middleware.cors import CORSMiddleware
31
  import matplotlib
@@ -292,17 +293,21 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
292
  query = request.get("query")
293
  csv_url = request.get("csv_url")
294
  decoded_url = unquote(csv_url)
 
 
 
 
295
 
296
  if if_initial_chat_question(query):
297
  answer = await asyncio.to_thread(
298
  langchain_csv_chat, decoded_url, query, False
299
  )
300
- logger.info("langchain_answer:", answer)
301
  return {"answer": jsonable_encoder(answer)}
302
 
303
  # Process with groq_chat first
304
  groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
305
- logger.info("groq_answer:", groq_answer)
306
 
307
  if process_answer(groq_answer) == "Empty response received.":
308
  return {"answer": "Sorry, I couldn't find relevant data..."}
@@ -609,7 +614,7 @@ current_langchain_chart_lock = threading.Lock()
609
 
610
 
611
  # Use a process pool to run CPU-bound chart generation
612
- process_executor = ProcessPoolExecutor(max_workers=10)
613
 
614
  # --- GROQ-BASED CHART GENERATION ---
615
  # def groq_chart(csv_url: str, question: str):
 
26
  import matplotlib
27
  import seaborn as sns
28
  from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
29
+ from rethink_gemini_agents.rethink_chat import gemini_llm_chat
30
  from util_service import _prompt_generator, process_answer
31
  from fastapi.middleware.cors import CORSMiddleware
32
  import matplotlib
 
293
  query = request.get("query")
294
  csv_url = request.get("csv_url")
295
  decoded_url = unquote(csv_url)
296
+
297
+ # gemini_answer = await asyncio.to_thread(gemini_llm_chat, decoded_url, query)
298
+ # logger.info("gemini_answer --> ", gemini_answer)
299
+ # return {"answer": gemini_answer}
300
 
301
  if if_initial_chat_question(query):
302
  answer = await asyncio.to_thread(
303
  langchain_csv_chat, decoded_url, query, False
304
  )
305
+ logger.info("langchain_answer --> ", answer)
306
  return {"answer": jsonable_encoder(answer)}
307
 
308
  # Process with groq_chat first
309
  groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
310
+ logger.info("groq_answer --> ", groq_answer)
311
 
312
  if process_answer(groq_answer) == "Empty response received.":
313
  return {"answer": "Sorry, I couldn't find relevant data..."}
 
614
 
615
 
616
  # Use a process pool to run CPU-bound chart generation
617
+ process_executor = ProcessPoolExecutor(max_workers=(os.cpu_count()-2))
618
 
619
  # --- GROQ-BASED CHART GENERATION ---
620
  # def groq_chart(csv_url: str, question: str):
pandasai.log ADDED
File without changes
rethink_gemini_agents/__pycache__/rethink_chat.cpython-311.pyc ADDED
Binary file (15 kB). View file
 
rethink_gemini_agents/rethink_chart.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ import os
4
+ import uuid
5
+ import logging
6
+ from io import StringIO
7
+ import sys
8
+ import traceback
9
+ from typing import Optional, Dict, Any, List
10
+ from pydantic import BaseModel, Field
11
+ from google.generativeai import GenerativeModel, configure
12
+ from dotenv import load_dotenv
13
+ from csv_service import clean_data
14
+
15
+ # Load environment variables from .env file
16
+ load_dotenv()
17
+
18
+ API_KEYS = os.getenv("GOOGLE_GENERATIVE_AI_API_KEYS", "").split(",")
19
+ MODEL_NAME = os.getenv("GOOGLE_GENERATIVE_AI_MODEL")
20
+
21
+ # Set up non-interactive matplotlib backend
22
+ os.environ['MPLBACKEND'] = 'agg'
23
+ import matplotlib.pyplot as plt
24
+ plt.show = lambda: None # Monkey patch to disable display
25
+
26
+ # Configure logging
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ format='%(asctime)s - %(levelname)s - %(message)s',
30
+ handlers=[logging.FileHandler('api_key_rotation.log'), logging.StreamHandler()]
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ class GeminiKeyManager:
35
+ """Manage multiple Gemini API keys with failover"""
36
+
37
+ def __init__(self, api_keys: List[str]):
38
+ self.original_keys = api_keys.copy()
39
+ self.available_keys = api_keys.copy()
40
+ self.active_key = None
41
+ self.failed_keys = {}
42
+
43
+ def configure(self) -> bool:
44
+ """Try to configure API with available keys"""
45
+ while self.available_keys:
46
+ key = self.available_keys.pop(0)
47
+ try:
48
+ configure(api_key=key)
49
+ self.active_key = key
50
+ logger.info(f"Successfully configured with key: {self._mask_key(key)}")
51
+ return True
52
+ except Exception as e:
53
+ self.failed_keys[key] = str(e)
54
+ logger.error(f"Key failed: {self._mask_key(key)}. Error: {str(e)}")
55
+
56
+ logger.critical("All API keys failed to configure")
57
+ return False
58
+
59
+ def _mask_key(self, key: str) -> str:
60
+ return f"{key[:8]}...{key[-4:]}" if key else ""
61
+
62
+ class PythonREPL:
63
+ """Secure Python REPL with non-interactive plotting"""
64
+
65
+ def __init__(self, df: pd.DataFrame):
66
+ self.df = df
67
+ self.local_env = {
68
+ "pd": pd,
69
+ "df": self.df.copy(),
70
+ "plt": plt,
71
+ "os": os,
72
+ "uuid": uuid,
73
+ "plt": plt
74
+ }
75
+ os.makedirs('generated_charts', exist_ok=True)
76
+
77
+ def execute(self, code: str) -> Dict[str, Any]:
78
+ old_stdout = sys.stdout
79
+ sys.stdout = mystdout = StringIO()
80
+
81
+ try:
82
+ # Ensure figure closure and non-interactive mode
83
+ code = f"""
84
+ import matplotlib.pyplot as plt
85
+ plt.switch_backend('agg')
86
+ {code}
87
+ plt.close('all')
88
+ """
89
+ exec(code, self.local_env)
90
+ self.df = self.local_env.get('df', self.df)
91
+ error = False
92
+ except Exception as e:
93
+ error_msg = traceback.format_exc()
94
+ error = True
95
+ finally:
96
+ sys.stdout = old_stdout
97
+
98
+ return {
99
+ "output": mystdout.getvalue(),
100
+ "error": error,
101
+ "error_message": error_msg if error else None,
102
+ "df": self.local_env.get('df', self.df)
103
+ }
104
+
105
+ class RethinkAgent(BaseModel):
106
+ df: pd.DataFrame
107
+ max_retries: int = Field(default=5, ge=1)
108
+ gemini_model: Optional[GenerativeModel] = None
109
+ current_retry: int = Field(default=0, ge=0)
110
+ repl: Optional[PythonREPL] = None
111
+ key_manager: Optional[GeminiKeyManager] = None
112
+
113
+ class Config:
114
+ arbitrary_types_allowed = True
115
+
116
+ def _extract_code(self, response: str) -> str:
117
+ code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
118
+ return code_match.group(1).strip() if code_match else response.strip()
119
+
120
+ def _generate_initial_prompt(self, query: str) -> str:
121
+ columns = "\n".join(self.df.columns)
122
+ return f"""
123
+ Generate Python code to analyze this DataFrame with columns:
124
+ {columns}
125
+
126
+ Query: {query}
127
+
128
+ Requirements:
129
+ 1. Save visualizations to 'generated_charts/' with UUID filename
130
+ 2. Use plt.savefig() with format='png'
131
+ 3. No plt.show() calls allowed
132
+ 4. After saving each chart, print exactly: CHART_SAVED: generated_charts/{{uuid}}.png
133
+ 5. Start with 'import pandas as pd'
134
+ 6. The DataFrame is available as 'df'
135
+ 7. Wrap code in ```python``` blocks
136
+ """
137
+
138
+ def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
139
+ return f"""
140
+ Previous code failed with error:
141
+ {error}
142
+
143
+ Revise this code:
144
+ {code}
145
+
146
+ New requirements:
147
+ 1. Fix the error
148
+ 2. Ensure plots are saved to generated_charts/
149
+ 3. After saving each chart, print exactly: CHART_SAVED: generated_charts/{{uuid}}.png
150
+ 4. No figure display
151
+ 5. Complete query: {query}
152
+
153
+ Explain the error first, then show corrected code in ```python``` blocks
154
+ """
155
+
156
+ def initialize_model(self, api_keys: List[str]) -> bool:
157
+ """Initialize Gemini model with key rotation"""
158
+ self.key_manager = GeminiKeyManager(api_keys)
159
+ if not self.key_manager.configure():
160
+ raise RuntimeError("All API keys failed to initialize")
161
+
162
+ try:
163
+ self.gemini_model = GenerativeModel(MODEL_NAME)
164
+ return True
165
+ except Exception as e:
166
+ logger.error(f"Model initialization failed: {str(e)}")
167
+ return False
168
+
169
+ def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
170
+ if error:
171
+ prompt = self._generate_retry_prompt(query, error, previous_code)
172
+ else:
173
+ prompt = self._generate_initial_prompt(query)
174
+
175
+ try:
176
+ response = self.gemini_model.generate_content(prompt)
177
+ return self._extract_code(response.text)
178
+ except Exception as e:
179
+ logger.error(f"API call failed: {str(e)}")
180
+ if self.key_manager.available_keys:
181
+ logger.info("Attempting key rotation...")
182
+ if self.key_manager.configure():
183
+ self.gemini_model = GenerativeModel(MODEL_NAME)
184
+ return self.generate_code(query, error, previous_code)
185
+ raise
186
+
187
+ def execute_query(self, query: str) -> str:
188
+ self.repl = PythonREPL(self.df)
189
+ error = None
190
+ previous_code = None
191
+
192
+ while self.current_retry < self.max_retries:
193
+ try:
194
+ code = self.generate_code(query, error, previous_code)
195
+ result = self.repl.execute(code)
196
+
197
+ if result["error"]:
198
+ self.current_retry += 1
199
+ error = result["error_message"]
200
+ previous_code = code
201
+ logger.warning(f"Retry {self.current_retry}/{self.max_retries}...")
202
+ else:
203
+ self.df = result["df"]
204
+ return result["output"]
205
+ except Exception as e:
206
+ logger.error(f"Critical failure: {str(e)}")
207
+ return f"System error: {str(e)}"
208
+
209
+ return f"Failed after {self.max_retries} attempts. Last error: {error}"
210
+
211
+
212
+
213
+ def gemini_llm_chart(csv_url: str, query: str) -> str:
214
+ df = clean_data(csv_url)
215
+
216
+ agent = RethinkAgent(df=df)
217
+ if not agent.initialize_model(API_KEYS):
218
+ print("Failed to initialize model with provided keys")
219
+ exit(1)
220
+
221
+ result = agent.execute_query(query)
222
+ print("\nAnalysis Result:")
223
+ print(result)
224
+
225
+
226
+
227
+ # Usage Example
228
+ # if __name__ == "__main__":
229
+ # df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
230
+
231
+ # agent = RethinkAgent(df=df)
232
+ # if not agent.initialize_model(API_KEYS):
233
+ # print("Failed to initialize model with provided keys")
234
+ # exit(1)
235
+
236
+ # result = agent.execute_query("Create a scatter plot of total_bill vs tip with kernel density estimate")
237
+ # print("\nAnalysis Result:")
238
+ # print(result)
rethink_gemini_agents/rethink_chat.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ import os
4
+ import uuid
5
+ import logging
6
+ from io import StringIO
7
+ import sys
8
+ import traceback
9
+ from typing import Optional, Dict, Any, List
10
+ from pydantic import BaseModel, Field
11
+ from google.generativeai import GenerativeModel, configure
12
+ from dotenv import load_dotenv
13
+ import seaborn as sns
14
+ from csv_service import clean_data
15
+ from util_service import handle_out_of_range_float
16
+
17
+ pd.set_option('display.max_columns', None) # Show all columns
18
+ pd.set_option('display.max_rows', None) # Show all rows
19
+ pd.set_option('display.max_colwidth', None) # Do not truncate cell content
20
+
21
+ # Load environment variables from .env file
22
+ load_dotenv()
23
+
24
+ API_KEYS = os.getenv("GOOGLE_GENERATIVE_AI_API_KEYS", "").split(",")
25
+ MODEL_NAME = os.getenv("GOOGLE_GENERATIVE_AI_MODEL")
26
+
27
+ # Set up non-interactive matplotlib backend
28
+ os.environ['MPLBACKEND'] = 'agg'
29
+ import matplotlib.pyplot as plt
30
+ plt.show = lambda: None # Monkey patch to disable display
31
+
32
+ # Configure logging
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(levelname)s - %(message)s',
36
+ handlers=[logging.FileHandler('api_key_rotation.log'), logging.StreamHandler()]
37
+ )
38
+ logger = logging.getLogger(__name__)
39
+
40
+ class GeminiKeyManager:
41
+ """Manage multiple Gemini API keys with failover"""
42
+
43
+ def __init__(self, api_keys: List[str]):
44
+ self.original_keys = api_keys.copy()
45
+ self.available_keys = api_keys.copy()
46
+ self.active_key = None
47
+ self.failed_keys = {}
48
+
49
+ def configure(self) -> bool:
50
+ """Try to configure API with available keys"""
51
+ while self.available_keys:
52
+ key = self.available_keys.pop(0)
53
+ try:
54
+ configure(api_key=key)
55
+ self.active_key = key
56
+ logger.info(f"Successfully configured with key: {self._mask_key(key)}")
57
+ return True
58
+ except Exception as e:
59
+ self.failed_keys[key] = str(e)
60
+ logger.error(f"Key failed: {self._mask_key(key)}. Error: {str(e)}")
61
+
62
+ logger.critical("All API keys failed to configure")
63
+ return False
64
+
65
+ def _mask_key(self, key: str) -> str:
66
+ return f"{key[:8]}...{key[-4:]}" if key else ""
67
+
68
+ class PythonREPL:
69
+ """Secure Python REPL with non-interactive plotting"""
70
+
71
+ def __init__(self, df: pd.DataFrame):
72
+ self.df = df
73
+ self.local_env = {
74
+ "pd": pd,
75
+ "df": self.df.copy(),
76
+ "plt": plt,
77
+ "os": os,
78
+ "uuid": uuid,
79
+ "plt": plt,
80
+ "sns": sns,
81
+ }
82
+ os.makedirs('generated_charts', exist_ok=True)
83
+
84
+ def execute(self, code: str) -> Dict[str, Any]:
85
+ old_stdout = sys.stdout
86
+ sys.stdout = mystdout = StringIO()
87
+
88
+ try:
89
+ # Ensure figure closure and non-interactive mode
90
+ code = f"""
91
+ import matplotlib.pyplot as plt
92
+ plt.switch_backend('agg')
93
+ {code}
94
+ plt.close('all')
95
+ """
96
+ exec(code, self.local_env)
97
+ self.df = self.local_env.get('df', self.df)
98
+ error = False
99
+ except Exception as e:
100
+ error_msg = traceback.format_exc()
101
+ error = True
102
+ finally:
103
+ sys.stdout = old_stdout
104
+
105
+ return {
106
+ "output": mystdout.getvalue(),
107
+ "error": error,
108
+ "error_message": error_msg if error else None,
109
+ "df": self.local_env.get('df', self.df)
110
+ }
111
+
112
+ class RethinkAgent(BaseModel):
113
+ df: pd.DataFrame
114
+ max_retries: int = Field(default=5, ge=1)
115
+ gemini_model: Optional[GenerativeModel] = None
116
+ current_retry: int = Field(default=0, ge=0)
117
+ repl: Optional[PythonREPL] = None
118
+ key_manager: Optional[GeminiKeyManager] = None
119
+
120
+ class Config:
121
+ arbitrary_types_allowed = True
122
+
123
+ def _extract_code(self, response: str) -> str:
124
+ code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
125
+ return code_match.group(1).strip() if code_match else response.strip()
126
+
127
+ def _generate_initial_prompt(self, query: str) -> str:
128
+ columns = "\n".join(self.df.columns)
129
+ return f"""
130
+ You are a data analyst assistant. Generate Python code to analyze this DataFrame with columns:
131
+ {columns}
132
+
133
+ Query: {query}
134
+
135
+ Requirements:
136
+ 1. Use print() to show results
137
+ 2. Start with 'import pandas as pd'
138
+ 3. The DataFrame is available as 'df'
139
+ 4. Wrap code in ```python``` blocks
140
+ """
141
+
142
+ def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
143
+ return f"""
144
+ Previous code failed with error:
145
+ {error}
146
+
147
+ Failed code:
148
+ {code}
149
+
150
+ Revise the code to fix the error and complete this query:
151
+ {query}
152
+
153
+ Requirements:
154
+ 1. Explain the error first
155
+ 2. Show corrected code in ```python``` blocks
156
+ """
157
+
158
+ def initialize_model(self, api_keys: List[str]) -> bool:
159
+ """Initialize Gemini model with key rotation"""
160
+ self.key_manager = GeminiKeyManager(api_keys)
161
+ if not self.key_manager.configure():
162
+ raise RuntimeError("All API keys failed to initialize")
163
+
164
+ try:
165
+ self.gemini_model = GenerativeModel(MODEL_NAME)
166
+ return True
167
+ except Exception as e:
168
+ logger.error(f"Model initialization failed: {str(e)}")
169
+ return False
170
+
171
+ def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
172
+ if error:
173
+ prompt = self._generate_retry_prompt(query, error, previous_code)
174
+ else:
175
+ prompt = self._generate_initial_prompt(query)
176
+
177
+ try:
178
+ response = self.gemini_model.generate_content(prompt)
179
+ return self._extract_code(response.text)
180
+ except Exception as e:
181
+ logger.error(f"API call failed: {str(e)}")
182
+ if self.key_manager.available_keys:
183
+ logger.info("Attempting key rotation...")
184
+ if self.key_manager.configure():
185
+ self.gemini_model = GenerativeModel(MODEL_NAME)
186
+ return self.generate_code(query, error, previous_code)
187
+ raise
188
+
189
+ def execute_query(self, query: str) -> str:
190
+ self.repl = PythonREPL(self.df)
191
+ error = None
192
+ previous_code = None
193
+
194
+ while self.current_retry < self.max_retries:
195
+ try:
196
+ code = self.generate_code(query, error, previous_code)
197
+ result = self.repl.execute(code)
198
+
199
+ if result["error"]:
200
+ self.current_retry += 1
201
+ error = result["error_message"]
202
+ previous_code = code
203
+ logger.warning(f"Retry {self.current_retry}/{self.max_retries}...")
204
+ else:
205
+ self.df = result["df"]
206
+ return result["output"]
207
+ except Exception as e:
208
+ logger.error(f"Critical failure: {str(e)}")
209
+ return f"System error: {str(e)}"
210
+
211
+ return f"Failed after {self.max_retries} attempts. Last error: {error}"
212
+
213
+
214
+ def gemini_llm_chat(csv_url: str, query: str) -> str:
215
+ # Assuming clean_data and RethinkAgent are defined elsewhere
216
+ df = clean_data(csv_url)
217
+ agent = RethinkAgent(df=df)
218
+
219
+ # Assuming API_KEYS is defined elsewhere
220
+ if not agent.initialize_model(API_KEYS):
221
+ print("Failed to initialize model with provided keys")
222
+ exit(1)
223
+
224
+ result = agent.execute_query(query)
225
+
226
+ # Process different response types
227
+ if isinstance(result, pd.DataFrame):
228
+ processed = result.apply(handle_out_of_range_float).to_dict(orient="records")
229
+ elif isinstance(result, pd.Series):
230
+ processed = result.apply(handle_out_of_range_float).to_dict()
231
+ elif isinstance(result, list):
232
+ processed = [handle_out_of_range_float(item) for item in result]
233
+ elif isinstance(result, dict):
234
+ processed = {k: handle_out_of_range_float(v) for k, v in result.items()}
235
+ else:
236
+ processed = {"answer": str(handle_out_of_range_float(result))}
237
+
238
+ logger.info(f"gemini processed result: {processed}")
239
+ return processed
240
+
241
+ # uvicorn controller:app --host localhost --port 8000 --reload
242
+
243
+ # Usage Example
244
+ # if __name__ == "__main__":
245
+ # df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
246
+
247
+ # agent = RethinkAgent(df=df)
248
+ # if not agent.initialize_model(API_KEYS):
249
+ # print("Failed to initialize model with provided keys")
250
+ # exit(1)
251
+
252
+ # result = agent.execute_query("How many rows and cols r there and what r their names?")
253
+ # print("\nAnalysis Result:")
254
+ # print(result)
util_service.py CHANGED
@@ -1,4 +1,5 @@
1
  from langchain_core.prompts import ChatPromptTemplate
 
2
 
3
  keywords = ["unfortunately", "unsupported", "error", "sorry", "response", "unable", "because"]
4
 
@@ -69,4 +70,14 @@ def _prompt_generator(question: str, chart_required: bool):
69
  if chart_required:
70
  return ChatPromptTemplate.from_template(chart_prompt)
71
  else:
72
- return ChatPromptTemplate.from_template(chat_prompt)
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.prompts import ChatPromptTemplate
2
+ import numpy as np
3
 
4
  keywords = ["unfortunately", "unsupported", "error", "sorry", "response", "unable", "because"]
5
 
 
70
  if chart_required:
71
  return ChatPromptTemplate.from_template(chart_prompt)
72
  else:
73
+ return ChatPromptTemplate.from_template(chat_prompt)
74
+
75
+
76
+
77
+ def handle_out_of_range_float(value):
78
+ if isinstance(value, float):
79
+ if np.isnan(value):
80
+ return None
81
+ elif np.isinf(value):
82
+ return "Infinity"
83
+ return value