Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Feb 23

Commit

da3abe4

1 Parent(s): a20826a

stay hard

Browse files

Files changed (13) hide show

.env +22 -0
Dockerfile +3 -0
__pycache__/controller.cpython-311.pyc +0 -0
__pycache__/csv_service.cpython-311.pyc +0 -0
__pycache__/intitial_q_handler.cpython-311.pyc +0 -0
__pycache__/util_service.cpython-311.pyc +0 -0
api_key_rotation.log +47 -0
controller.py +8 -3
pandasai.log +0 -0
rethink_gemini_agents/__pycache__/rethink_chat.cpython-311.pyc +0 -0
rethink_gemini_agents/rethink_chart.py +238 -0
rethink_gemini_agents/rethink_chat.py +254 -0
util_service.py +12 -1

.env ADDED Viewed

	@@ -0,0 +1,22 @@

+GOOGLE_GENERATIVE_AI_API_KEYS=AIzaSyC6CuXP7oMHbQymv5desJ7HJypSPisYN7s,AIzaSyAzV1YGajXhC2N8n8b3bgU1PHUXNWdZiUk,AIzaSyAYvv5urC0lhzNYYO1f4a4EYqTsZrmubrM,AIzaSyD7VsIKjtNBlQUWXQ_bIFbl240f2AUT7nc,AIzaSyCFnFsqplkNeQFjRh2EhkK90t48wkmyJQU
+GOOGLE_GENERATIVE_AI_MODEL=gemini-2.0-flash-lite-preview-02-05
+# Pandas API keys
+PANDASAI_API_KEYS=$2a$10$VVwPEnzFxnEnJhk2u5ef1ewTuT3rNK59QpYQWAhUY29FHH4b7fwNC,$2a$10$5ikmN9RtNWHvP8aLnHfm.epO/XhVF1Pvk1Chy2Fqa.4x232a374xK,$2a$10$aAvr1DH3Pt3KLPDYa.JED..d83Pl6M4xnQd6uY8fadNkqSEv9KaYK,$2a$10$tJkqyS9Us36ernP1N4/8dOE088rCm7MC3gIj2RQMlaalY34EkkDHy,$2a$10$V0tThT/XnmlHbJucM00yN.hxz9r3ZwVqe0sQRQwDZAGHmhMq81D7O,$2a$10$d9vj8iPtD/L/i2B5AhiKTexOSpZ52XZTRUDkZa4p0vnI6RCj7f0K2,$2a$10$PZdCvVJB8301iDIZrZ2z8uB9d68kaBeOjaOIbbXGgqlZ2frbTm0eG,$2a$10$SHK.YTrTQcol/yM/RD8tZOcIF2fUTXtaETpDo8G0At90NxQ1HGk.C,$2a$10$QYz2Fp2fFZNq80HjAC/Okuy/PZFMgGgpPuQAyFDVtvB0G9bCn8Cee,$2a$10$SGY3HoCX0jbBXHSbpwGH1OEC/yPwT5792MjSZeWYVLew52pE4gR0y,$2a$10$QHPpvXwCXhHtKyx4jWMTh.8Mz1azTEQbDdDMpmikOzdgKtFfOq3FG,$2a$10$KoTsqdLPNIBiLRHWUg/6guqxNrB4ByljnMDTN0HJXmGl.PagdxpGm,$2a$10$ERsxnbIwk0LOMqmFX1SfjuMSXzh5gsBqm1BnYXFNEBAS3J1AfK24m,$2a$10$zwX4F0/pxXgmuAfDteFlHeXswX8cvVAvkv8mBAJ4WLvAEaUM3v266,$2a$10$LPA4FUIjg6CbZYEhi3NLRuY2Yar5SbT9gYoQ/oZuPaFUxNUyaJ/ii,$2a$10$kLDISr9ivaqcYiAZ1TmBOeclXK0C5a/LPPB3Rsxme19NwVPhznQya,$2a$10$qpoxy4k4sQya0tY7/lSEkuEuwVQGEl757A.jVPGNEh6p5tN6Yofyq,$2a$10$TDndpw.NWwx2k5X.9eI30uAaga8pbYO/erUEblVGcj6ydzSgzdVde,$2a$10$TtZtCWXgVSUhaNMMsuOjLuC6tCY1GTzUR/PvIUdowXYQdmefgpvbW,$2a$10$Orj1ZiURJkREK30gdwEYLeV7mY657jJhif8SckIPdvctjkWHXHrq6,$2a$10$CxEXDLjFtK1.nE9GuIt1duxLbvYtz2EA7x1LqddNF44kKVcc8aGZC
+PANDASAI_ERROR_MESSAGE=Your BambooLLM inference limit has been reached. Please use pandas-ai with any other LLM available in the library. If you wish to continue using BambooLLM, visit this link https://tally.so/r/wzZNWg for more information.
+# Nessasary file paths
+IMAGE_FILE_PATH=generated_charts
+IMAGE_NOT_FOUND=exports/charts/image_not_found.png
+# Allowed hosts
+ALLOWED_HOSTS=https://vercel-test-10-rho.vercel.app,https://freechat-rho.vercel.app,https://chatcsvandpdf.vercel.app
+# Auth token
+AUTH_TOKEN=raVkp0VgY3z0wICaUi95d73VSUGxu8DA3mXUoiheC8B8gBQ1Rk6Zj4aO6kcba3gWy8KU10deRMte8GvG36wGauocooLLp6y9fJt7XUgYgzvxz6y8cfhwuifWSQzeB8qMbQXZjkH1oP3rocFGArSzWZdj5phDpGdwQoxkuBpOSfA4WhMPhMr4HdohjBuiy2TlIa7ICpd5fq35LCRt2ZERaXYGUbD7MqrpDOICgXyABTjTWGHe6r0hMK7k4JiIM36rZ028a777FausbLPke9V0lPqAz5ialT0j7RbMj2fxheiZCoErx15Qx5dGfpcS9O5Xi6bpTADrYcRej0wrJv3rZrcrCBrY4m6ep0eXRkElQM389H2KFu1MlI7Twf0TxcerPh6GMAZTg2YefZU1QE8Y0ODsruCU3Jiq6UfaYXMHP5YMwpcwwHzioybjFVfuMtDePjya7y7qwdwjXTqDDJAsSe061sMDKvHpPYgAOpaYerTVy4qGMuWTwDceUzqs39X0
+# GROQ API keys
+GROQ_API_KEYS=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,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,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,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,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
+GROQ_LLM_MODEL=llama3-70b-8192
+ALLOW_DANGEROUS_CODE=true

Dockerfile CHANGED Viewed

@@ -13,6 +13,9 @@ RUN mkdir -p /app/cache && chmod -R 777 /app/cache
 # Create the log file and set permissions
 RUN touch /app/pandasai.log && chmod 666 /app/pandasai.log
 # Copy the requirements file first
 COPY requirements.txt .

 # Create the log file and set permissions
 RUN touch /app/pandasai.log && chmod 666 /app/pandasai.log
+# Set the Matplotlib cache directory to /app/cache
+ENV MPLCONFIGDIR=/app/cache
 # Copy the requirements file first
 COPY requirements.txt .

__pycache__/controller.cpython-311.pyc ADDED Viewed

Binary file (27 kB). View file

__pycache__/csv_service.cpython-311.pyc ADDED Viewed

Binary file (6.15 kB). View file

__pycache__/intitial_q_handler.cpython-311.pyc ADDED Viewed

Binary file (1.49 kB). View file

__pycache__/util_service.cpython-311.pyc ADDED Viewed

Binary file (4.89 kB). View file

api_key_rotation.log ADDED Viewed

	@@ -0,0 +1,47 @@

+2025-02-23 09:01:40,193 - INFO - Max CPUs: 12
+2025-02-23 09:01:54,167 - INFO - Max CPUs: 12
+2025-02-23 09:02:08,264 - INFO - Max CPUs: 12
+2025-02-23 09:03:32,223 - INFO - Max CPUs: 12
+2025-02-23 09:07:09,352 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:10:31,052 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:12:09,279 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:12:12,022 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:18:07,201 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:21:06,544 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:21:08,510 - ERROR - Error processing request: 'module' object is not callable
+2025-02-23 09:26:37,959 - INFO - Max CPUs: 12
+2025-02-23 09:26:48,431 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:28:48,467 - INFO - Max CPUs: 12
+2025-02-23 09:29:48,739 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:29:51,076 - INFO - gemini_answer -->
+2025-02-23 09:30:08,183 - INFO - Max CPUs: 12
+2025-02-23 09:31:24,934 - INFO - Max CPUs: 12
+2025-02-23 09:33:09,881 - INFO - Max CPUs: 12
+2025-02-23 09:35:32,429 - INFO - Max CPUs: 12
+2025-02-23 09:35:32,513 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:35:35,038 - INFO - gemini_answer -->
+2025-02-23 09:37:57,798 - INFO - Max CPUs: 12
+2025-02-23 09:38:12,937 - INFO - Max CPUs: 12
+2025-02-23 09:38:13,009 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:38:15,381 - INFO - Gemini processed result: {'answer': '12\n'}
+2025-02-23 09:38:15,383 - INFO - gemini_answer -->
+2025-02-23 09:39:40,048 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:39:43,293 - INFO - Gemini processed result: {'answer': '8\n'}
+2025-02-23 09:39:43,294 - INFO - gemini_answer -->
+2025-02-23 09:39:53,823 - INFO - Max CPUs: 12
+2025-02-23 09:40:16,383 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 09:40:19,010 - INFO - gemini processed result: {'answer': '      id  name        category  price  stock  date_added                                        description          brand\n266  267  Race          Beauty   5.09    335  2023-10-12  Central contain pattern education boy provide ...    Roberts Inc\n399  400    No           Books   5.13    326  2024-02-05  Per son war speak wait assume throughout leade...  Rodriguez LLC\n292  293   Arm  Home & Kitchen   8.73     44  2023-10-17  Long must maybe hour rather company with recen...       Gray Inc\n'}
+2025-02-23 09:40:19,011 - INFO - gemini_answer -->
+2025-02-23 10:00:47,546 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 10:00:49,976 - INFO - gemini processed result: {'answer': 'id               int64\nname            object\ncategory        object\nprice          float64\nstock            int64\ndate_added      object\ndescription     object\nbrand           object\ndtype: object\n'}
+2025-02-23 10:00:49,978 - INFO - gemini_answer -->
+2025-02-23 10:01:18,233 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 10:01:20,775 - INFO - gemini processed result: {'answer': 'Top 3 Costliest Products:\n              name   price\n815  International  499.99\n132           Five  499.87\n307         Reduce  499.49\n'}
+2025-02-23 10:01:20,776 - INFO - gemini_answer -->
+2025-02-23 10:01:30,606 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 10:01:33,213 - INFO - gemini processed result: {'answer': '      id           name        category   price  stock  date_added                                        description                         brand\n815  816  International          Sports  499.99    499  2023-11-06  Training enter fly situation former threat alo...                  Farmer-Sharp\n132  133           Five  Home & Kitchen  499.87    169  2023-11-02  Activity president realize artist brother fill...             Richardson-Walker\n307  308         Reduce        Clothing  499.49    283  2023-11-27  Home which view city rock seat near business l...  Robinson, Rodriguez and Chen\n'}
+2025-02-23 10:01:33,213 - INFO - gemini_answer -->
+2025-02-23 10:06:24,399 - INFO - Max CPUs: 12
+2025-02-23 10:06:34,598 - INFO - Successfully configured with key: AIzaSyC6...YN7s
+2025-02-23 10:06:37,383 - INFO - gemini processed result: {'answer': '      id           name        category   price  stock  date_added  \\\n815  816  International          Sports  499.99    499  2023-11-06   \n132  133           Five  Home & Kitchen  499.87    169  2023-11-02   \n307  308         Reduce        Clothing  499.49    283  2023-11-27   \n\n                                                                                        description  \\\n815                             Training enter fly situation former threat alone the trial another.   \n132  Activity president realize artist brother fill if maybe time region financial brother trouble.   \n307                        Home which view city rock seat near business loss federal growth appear.   \n\n                            brand  \n815                  Farmer-Sharp  \n132             Richardson-Walker  \n307  Robinson, Rodriguez and Chen  \n'}
+2025-02-23 10:06:37,385 - INFO - gemini_answer -->

controller.py CHANGED Viewed

@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
 import matplotlib
@@ -292,17 +293,21 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
         query = request.get("query")
         csv_url = request.get("csv_url")
         decoded_url = unquote(csv_url)
         if if_initial_chat_question(query):
             answer = await asyncio.to_thread(
                 langchain_csv_chat, decoded_url, query, False
             )
-            logger.info("langchain_answer:", answer)
             return {"answer": jsonable_encoder(answer)}
         # Process with groq_chat first
         groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
-        logger.info("groq_answer:", groq_answer)
         if process_answer(groq_answer) == "Empty response received.":
             return {"answer": "Sorry, I couldn't find relevant data..."}
@@ -609,7 +614,7 @@ current_langchain_chart_lock = threading.Lock()
 # Use a process pool to run CPU-bound chart generation
-process_executor = ProcessPoolExecutor(max_workers=10)
 # --- GROQ-BASED CHART GENERATION ---
 # def groq_chart(csv_url: str, question: str):

 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
+from rethink_gemini_agents.rethink_chat import gemini_llm_chat
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
 import matplotlib
         query = request.get("query")
         csv_url = request.get("csv_url")
         decoded_url = unquote(csv_url)
+        # gemini_answer = await asyncio.to_thread(gemini_llm_chat, decoded_url, query)
+        # logger.info("gemini_answer --> ", gemini_answer)
+        # return {"answer": gemini_answer}
         if if_initial_chat_question(query):
             answer = await asyncio.to_thread(
                 langchain_csv_chat, decoded_url, query, False
             )
+            logger.info("langchain_answer --> ", answer)
             return {"answer": jsonable_encoder(answer)}
         # Process with groq_chat first
         groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
+        logger.info("groq_answer --> ", groq_answer)
         if process_answer(groq_answer) == "Empty response received.":
             return {"answer": "Sorry, I couldn't find relevant data..."}
 # Use a process pool to run CPU-bound chart generation
+process_executor = ProcessPoolExecutor(max_workers=(os.cpu_count()-2))
 # --- GROQ-BASED CHART GENERATION ---
 # def groq_chart(csv_url: str, question: str):

pandasai.log ADDED Viewed

File without changes

rethink_gemini_agents/__pycache__/rethink_chat.cpython-311.pyc ADDED Viewed

Binary file (15 kB). View file

rethink_gemini_agents/rethink_chart.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import pandas as pd
+import re
+import os
+import uuid
+import logging
+from io import StringIO
+import sys
+import traceback
+from typing import Optional, Dict, Any, List
+from pydantic import BaseModel, Field
+from google.generativeai import GenerativeModel, configure
+from dotenv import load_dotenv
+from csv_service import clean_data
+# Load environment variables from .env file
+load_dotenv()
+API_KEYS = os.getenv("GOOGLE_GENERATIVE_AI_API_KEYS", "").split(",")
+MODEL_NAME = os.getenv("GOOGLE_GENERATIVE_AI_MODEL")
+# Set up non-interactive matplotlib backend
+os.environ['MPLBACKEND'] = 'agg'
+import matplotlib.pyplot as plt
+plt.show = lambda: None  # Monkey patch to disable display
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[logging.FileHandler('api_key_rotation.log'), logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
+class GeminiKeyManager:
+    """Manage multiple Gemini API keys with failover"""
+    def __init__(self, api_keys: List[str]):
+        self.original_keys = api_keys.copy()
+        self.available_keys = api_keys.copy()
+        self.active_key = None
+        self.failed_keys = {}
+    def configure(self) -> bool:
+        """Try to configure API with available keys"""
+        while self.available_keys:
+            key = self.available_keys.pop(0)
+            try:
+                configure(api_key=key)
+                self.active_key = key
+                logger.info(f"Successfully configured with key: {self._mask_key(key)}")
+                return True
+            except Exception as e:
+                self.failed_keys[key] = str(e)
+                logger.error(f"Key failed: {self._mask_key(key)}. Error: {str(e)}")
+        logger.critical("All API keys failed to configure")
+        return False
+    def _mask_key(self, key: str) -> str:
+        return f"{key[:8]}...{key[-4:]}" if key else ""
+class PythonREPL:
+    """Secure Python REPL with non-interactive plotting"""
+    def __init__(self, df: pd.DataFrame):
+        self.df = df
+        self.local_env = {
+            "pd": pd,
+            "df": self.df.copy(),
+            "plt": plt,
+            "os": os,
+            "uuid": uuid,
+            "plt": plt
+        }
+        os.makedirs('generated_charts', exist_ok=True)
+    def execute(self, code: str) -> Dict[str, Any]:
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        try:
+            # Ensure figure closure and non-interactive mode
+            code = f"""
+import matplotlib.pyplot as plt
+plt.switch_backend('agg')
+{code}
+plt.close('all')
+            """
+            exec(code, self.local_env)
+            self.df = self.local_env.get('df', self.df)
+            error = False
+        except Exception as e:
+            error_msg = traceback.format_exc()
+            error = True
+        finally:
+            sys.stdout = old_stdout
+        return {
+            "output": mystdout.getvalue(),
+            "error": error,
+            "error_message": error_msg if error else None,
+            "df": self.local_env.get('df', self.df)
+        }
+class RethinkAgent(BaseModel):
+    df: pd.DataFrame
+    max_retries: int = Field(default=5, ge=1)
+    gemini_model: Optional[GenerativeModel] = None
+    current_retry: int = Field(default=0, ge=0)
+    repl: Optional[PythonREPL] = None
+    key_manager: Optional[GeminiKeyManager] = None
+    class Config:
+        arbitrary_types_allowed = True
+    def _extract_code(self, response: str) -> str:
+        code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
+        return code_match.group(1).strip() if code_match else response.strip()
+    def _generate_initial_prompt(self, query: str) -> str:
+        columns = "\n".join(self.df.columns)
+        return f"""
+        Generate Python code to analyze this DataFrame with columns:
+        {columns}
+        Query: {query}
+        Requirements:
+        1. Save visualizations to 'generated_charts/' with UUID filename
+        2. Use plt.savefig() with format='png'
+        3. No plt.show() calls allowed
+        4. After saving each chart, print exactly: CHART_SAVED: generated_charts/{{uuid}}.png
+        5. Start with 'import pandas as pd'
+        6. The DataFrame is available as 'df'
+        7. Wrap code in ```python``` blocks
+        """
+    def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
+        return f"""
+        Previous code failed with error:
+        {error}
+        Revise this code:
+        {code}
+        New requirements:
+        1. Fix the error
+        2. Ensure plots are saved to generated_charts/
+        3. After saving each chart, print exactly: CHART_SAVED: generated_charts/{{uuid}}.png
+        4. No figure display
+        5. Complete query: {query}
+        Explain the error first, then show corrected code in ```python``` blocks
+        """
+    def initialize_model(self, api_keys: List[str]) -> bool:
+        """Initialize Gemini model with key rotation"""
+        self.key_manager = GeminiKeyManager(api_keys)
+        if not self.key_manager.configure():
+            raise RuntimeError("All API keys failed to initialize")
+        try:
+            self.gemini_model = GenerativeModel(MODEL_NAME)
+            return True
+        except Exception as e:
+            logger.error(f"Model initialization failed: {str(e)}")
+            return False
+    def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
+        if error:
+            prompt = self._generate_retry_prompt(query, error, previous_code)
+        else:
+            prompt = self._generate_initial_prompt(query)
+        try:
+            response = self.gemini_model.generate_content(prompt)
+            return self._extract_code(response.text)
+        except Exception as e:
+            logger.error(f"API call failed: {str(e)}")
+            if self.key_manager.available_keys:
+                logger.info("Attempting key rotation...")
+                if self.key_manager.configure():
+                    self.gemini_model = GenerativeModel(MODEL_NAME)
+                    return self.generate_code(query, error, previous_code)
+            raise
+    def execute_query(self, query: str) -> str:
+        self.repl = PythonREPL(self.df)
+        error = None
+        previous_code = None
+        while self.current_retry < self.max_retries:
+            try:
+                code = self.generate_code(query, error, previous_code)
+                result = self.repl.execute(code)
+                if result["error"]:
+                    self.current_retry += 1
+                    error = result["error_message"]
+                    previous_code = code
+                    logger.warning(f"Retry {self.current_retry}/{self.max_retries}...")
+                else:
+                    self.df = result["df"]
+                    return result["output"]
+            except Exception as e:
+                logger.error(f"Critical failure: {str(e)}")
+                return f"System error: {str(e)}"
+        return f"Failed after {self.max_retries} attempts. Last error: {error}"
+def gemini_llm_chart(csv_url: str, query: str) -> str:
+    df = clean_data(csv_url)
+    agent = RethinkAgent(df=df)
+    if not agent.initialize_model(API_KEYS):
+        print("Failed to initialize model with provided keys")
+        exit(1)
+    result = agent.execute_query(query)
+    print("\nAnalysis Result:")
+    print(result)
+# Usage Example
+# if __name__ == "__main__":
+#     df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
+#     agent = RethinkAgent(df=df)
+#     if not agent.initialize_model(API_KEYS):
+#         print("Failed to initialize model with provided keys")
+#         exit(1)
+#     result = agent.execute_query("Create a scatter plot of total_bill vs tip with kernel density estimate")
+#     print("\nAnalysis Result:")
+#     print(result)

rethink_gemini_agents/rethink_chat.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import pandas as pd
+import re
+import os
+import uuid
+import logging
+from io import StringIO
+import sys
+import traceback
+from typing import Optional, Dict, Any, List
+from pydantic import BaseModel, Field
+from google.generativeai import GenerativeModel, configure
+from dotenv import load_dotenv
+import seaborn as sns
+from csv_service import clean_data
+from util_service import handle_out_of_range_float
+pd.set_option('display.max_columns', None)  # Show all columns
+pd.set_option('display.max_rows', None)     # Show all rows
+pd.set_option('display.max_colwidth', None)   # Do not truncate cell content
+# Load environment variables from .env file
+load_dotenv()
+API_KEYS = os.getenv("GOOGLE_GENERATIVE_AI_API_KEYS", "").split(",")
+MODEL_NAME = os.getenv("GOOGLE_GENERATIVE_AI_MODEL")
+# Set up non-interactive matplotlib backend
+os.environ['MPLBACKEND'] = 'agg'
+import matplotlib.pyplot as plt
+plt.show = lambda: None  # Monkey patch to disable display
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[logging.FileHandler('api_key_rotation.log'), logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
+class GeminiKeyManager:
+    """Manage multiple Gemini API keys with failover"""
+    def __init__(self, api_keys: List[str]):
+        self.original_keys = api_keys.copy()
+        self.available_keys = api_keys.copy()
+        self.active_key = None
+        self.failed_keys = {}
+    def configure(self) -> bool:
+        """Try to configure API with available keys"""
+        while self.available_keys:
+            key = self.available_keys.pop(0)
+            try:
+                configure(api_key=key)
+                self.active_key = key
+                logger.info(f"Successfully configured with key: {self._mask_key(key)}")
+                return True
+            except Exception as e:
+                self.failed_keys[key] = str(e)
+                logger.error(f"Key failed: {self._mask_key(key)}. Error: {str(e)}")
+        logger.critical("All API keys failed to configure")
+        return False
+    def _mask_key(self, key: str) -> str:
+        return f"{key[:8]}...{key[-4:]}" if key else ""
+class PythonREPL:
+    """Secure Python REPL with non-interactive plotting"""
+    def __init__(self, df: pd.DataFrame):
+        self.df = df
+        self.local_env = {
+            "pd": pd,
+            "df": self.df.copy(),
+            "plt": plt,
+            "os": os,
+            "uuid": uuid,
+            "plt": plt,
+            "sns": sns,
+        }
+        os.makedirs('generated_charts', exist_ok=True)
+    def execute(self, code: str) -> Dict[str, Any]:
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        try:
+            # Ensure figure closure and non-interactive mode
+            code = f"""
+import matplotlib.pyplot as plt
+plt.switch_backend('agg')
+{code}
+plt.close('all')
+            """
+            exec(code, self.local_env)
+            self.df = self.local_env.get('df', self.df)
+            error = False
+        except Exception as e:
+            error_msg = traceback.format_exc()
+            error = True
+        finally:
+            sys.stdout = old_stdout
+        return {
+            "output": mystdout.getvalue(),
+            "error": error,
+            "error_message": error_msg if error else None,
+            "df": self.local_env.get('df', self.df)
+        }
+class RethinkAgent(BaseModel):
+    df: pd.DataFrame
+    max_retries: int = Field(default=5, ge=1)
+    gemini_model: Optional[GenerativeModel] = None
+    current_retry: int = Field(default=0, ge=0)
+    repl: Optional[PythonREPL] = None
+    key_manager: Optional[GeminiKeyManager] = None
+    class Config:
+        arbitrary_types_allowed = True
+    def _extract_code(self, response: str) -> str:
+        code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
+        return code_match.group(1).strip() if code_match else response.strip()
+    def _generate_initial_prompt(self, query: str) -> str:
+        columns = "\n".join(self.df.columns)
+        return f"""
+        You are a data analyst assistant. Generate Python code to analyze this DataFrame with columns:
+        {columns}
+        Query: {query}
+        Requirements:
+        1. Use print() to show results
+        2. Start with 'import pandas as pd'
+        3. The DataFrame is available as 'df'
+        4. Wrap code in ```python``` blocks
+        """
+    def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
+        return f"""
+        Previous code failed with error:
+        {error}
+        Failed code:
+        {code}
+        Revise the code to fix the error and complete this query:
+        {query}
+        Requirements:
+        1. Explain the error first
+        2. Show corrected code in ```python``` blocks
+        """
+    def initialize_model(self, api_keys: List[str]) -> bool:
+        """Initialize Gemini model with key rotation"""
+        self.key_manager = GeminiKeyManager(api_keys)
+        if not self.key_manager.configure():
+            raise RuntimeError("All API keys failed to initialize")
+        try:
+            self.gemini_model = GenerativeModel(MODEL_NAME)
+            return True
+        except Exception as e:
+            logger.error(f"Model initialization failed: {str(e)}")
+            return False
+    def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
+        if error:
+            prompt = self._generate_retry_prompt(query, error, previous_code)
+        else:
+            prompt = self._generate_initial_prompt(query)
+        try:
+            response = self.gemini_model.generate_content(prompt)
+            return self._extract_code(response.text)
+        except Exception as e:
+            logger.error(f"API call failed: {str(e)}")
+            if self.key_manager.available_keys:
+                logger.info("Attempting key rotation...")
+                if self.key_manager.configure():
+                    self.gemini_model = GenerativeModel(MODEL_NAME)
+                    return self.generate_code(query, error, previous_code)
+            raise
+    def execute_query(self, query: str) -> str:
+        self.repl = PythonREPL(self.df)
+        error = None
+        previous_code = None
+        while self.current_retry < self.max_retries:
+            try:
+                code = self.generate_code(query, error, previous_code)
+                result = self.repl.execute(code)
+                if result["error"]:
+                    self.current_retry += 1
+                    error = result["error_message"]
+                    previous_code = code
+                    logger.warning(f"Retry {self.current_retry}/{self.max_retries}...")
+                else:
+                    self.df = result["df"]
+                    return result["output"]
+            except Exception as e:
+                logger.error(f"Critical failure: {str(e)}")
+                return f"System error: {str(e)}"
+        return f"Failed after {self.max_retries} attempts. Last error: {error}"
+def gemini_llm_chat(csv_url: str, query: str) -> str:
+    # Assuming clean_data and RethinkAgent are defined elsewhere
+    df = clean_data(csv_url)
+    agent = RethinkAgent(df=df)
+    # Assuming API_KEYS is defined elsewhere
+    if not agent.initialize_model(API_KEYS):
+        print("Failed to initialize model with provided keys")
+        exit(1)
+    result = agent.execute_query(query)
+    # Process different response types
+    if isinstance(result, pd.DataFrame):
+        processed = result.apply(handle_out_of_range_float).to_dict(orient="records")
+    elif isinstance(result, pd.Series):
+        processed = result.apply(handle_out_of_range_float).to_dict()
+    elif isinstance(result, list):
+        processed = [handle_out_of_range_float(item) for item in result]
+    elif isinstance(result, dict):
+        processed = {k: handle_out_of_range_float(v) for k, v in result.items()}
+    else:
+        processed = {"answer": str(handle_out_of_range_float(result))}
+    logger.info(f"gemini processed result: {processed}")
+    return processed
+# uvicorn controller:app --host localhost --port 8000 --reload
+# Usage Example
+# if __name__ == "__main__":
+#     df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
+#     agent = RethinkAgent(df=df)
+#     if not agent.initialize_model(API_KEYS):
+#         print("Failed to initialize model with provided keys")
+#         exit(1)
+#     result = agent.execute_query("How many rows and cols r there and what r their names?")
+#     print("\nAnalysis Result:")
+#     print(result)

util_service.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from langchain_core.prompts import ChatPromptTemplate
 keywords = ["unfortunately", "unsupported", "error", "sorry", "response", "unable", "because"]
@@ -69,4 +70,14 @@ def _prompt_generator(question: str, chart_required: bool):
     if chart_required:
         return ChatPromptTemplate.from_template(chart_prompt)
     else:
-        return ChatPromptTemplate.from_template(chat_prompt)

 from langchain_core.prompts import ChatPromptTemplate
+import numpy as np
 keywords = ["unfortunately", "unsupported", "error", "sorry", "response", "unable", "because"]
     if chart_required:
         return ChatPromptTemplate.from_template(chart_prompt)
     else:
+        return ChatPromptTemplate.from_template(chat_prompt)
+def handle_out_of_range_float(value):
+    if isinstance(value, float):
+        if np.isnan(value):
+            return None
+        elif np.isinf(value):
+            return "Infinity"
+    return value