Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Feb 23

Commit

a1bb638

1 Parent(s): 43443d0

stay hard

Browse files

Files changed (3) hide show

.env +1 -0
controller.py +11 -3
rethink_gemini_agents/gemini_langchain_service.py +212 -0

.env CHANGED Viewed

@@ -1,5 +1,6 @@
 GOOGLE_GENERATIVE_AI_API_KEYS=AIzaSyC6CuXP7oMHbQymv5desJ7HJypSPisYN7s,AIzaSyAzV1YGajXhC2N8n8b3bgU1PHUXNWdZiUk,AIzaSyAYvv5urC0lhzNYYO1f4a4EYqTsZrmubrM,AIzaSyD7VsIKjtNBlQUWXQ_bIFbl240f2AUT7nc,AIzaSyCFnFsqplkNeQFjRh2EhkK90t48wkmyJQU
 GOOGLE_GENERATIVE_AI_MODEL=gemini-2.0-flash-lite-preview-02-05
 # Pandas API keys
 PANDASAI_API_KEYS=$2a$10$VVwPEnzFxnEnJhk2u5ef1ewTuT3rNK59QpYQWAhUY29FHH4b7fwNC,$2a$10$5ikmN9RtNWHvP8aLnHfm.epO/XhVF1Pvk1Chy2Fqa.4x232a374xK,$2a$10$aAvr1DH3Pt3KLPDYa.JED..d83Pl6M4xnQd6uY8fadNkqSEv9KaYK,$2a$10$tJkqyS9Us36ernP1N4/8dOE088rCm7MC3gIj2RQMlaalY34EkkDHy,$2a$10$V0tThT/XnmlHbJucM00yN.hxz9r3ZwVqe0sQRQwDZAGHmhMq81D7O,$2a$10$d9vj8iPtD/L/i2B5AhiKTexOSpZ52XZTRUDkZa4p0vnI6RCj7f0K2,$2a$10$PZdCvVJB8301iDIZrZ2z8uB9d68kaBeOjaOIbbXGgqlZ2frbTm0eG,$2a$10$SHK.YTrTQcol/yM/RD8tZOcIF2fUTXtaETpDo8G0At90NxQ1HGk.C,$2a$10$QYz2Fp2fFZNq80HjAC/Okuy/PZFMgGgpPuQAyFDVtvB0G9bCn8Cee,$2a$10$SGY3HoCX0jbBXHSbpwGH1OEC/yPwT5792MjSZeWYVLew52pE4gR0y,$2a$10$QHPpvXwCXhHtKyx4jWMTh.8Mz1azTEQbDdDMpmikOzdgKtFfOq3FG,$2a$10$KoTsqdLPNIBiLRHWUg/6guqxNrB4ByljnMDTN0HJXmGl.PagdxpGm,$2a$10$ERsxnbIwk0LOMqmFX1SfjuMSXzh5gsBqm1BnYXFNEBAS3J1AfK24m,$2a$10$zwX4F0/pxXgmuAfDteFlHeXswX8cvVAvkv8mBAJ4WLvAEaUM3v266,$2a$10$LPA4FUIjg6CbZYEhi3NLRuY2Yar5SbT9gYoQ/oZuPaFUxNUyaJ/ii,$2a$10$kLDISr9ivaqcYiAZ1TmBOeclXK0C5a/LPPB3Rsxme19NwVPhznQya,$2a$10$qpoxy4k4sQya0tY7/lSEkuEuwVQGEl757A.jVPGNEh6p5tN6Yofyq,$2a$10$TDndpw.NWwx2k5X.9eI30uAaga8pbYO/erUEblVGcj6ydzSgzdVde,$2a$10$TtZtCWXgVSUhaNMMsuOjLuC6tCY1GTzUR/PvIUdowXYQdmefgpvbW,$2a$10$Orj1ZiURJkREK30gdwEYLeV7mY657jJhif8SckIPdvctjkWHXHrq6,$2a$10$CxEXDLjFtK1.nE9GuIt1duxLbvYtz2EA7x1LqddNF44kKVcc8aGZC

 GOOGLE_GENERATIVE_AI_API_KEYS=AIzaSyC6CuXP7oMHbQymv5desJ7HJypSPisYN7s,AIzaSyAzV1YGajXhC2N8n8b3bgU1PHUXNWdZiUk,AIzaSyAYvv5urC0lhzNYYO1f4a4EYqTsZrmubrM,AIzaSyD7VsIKjtNBlQUWXQ_bIFbl240f2AUT7nc,AIzaSyCFnFsqplkNeQFjRh2EhkK90t48wkmyJQU
 GOOGLE_GENERATIVE_AI_MODEL=gemini-2.0-flash-lite-preview-02-05
+GOOGLE_GENERATIVE_AI_MODEL_LANGCHAIN_AGENT=gemini-2.0-flash
 # Pandas API keys
 PANDASAI_API_KEYS=$2a$10$VVwPEnzFxnEnJhk2u5ef1ewTuT3rNK59QpYQWAhUY29FHH4b7fwNC,$2a$10$5ikmN9RtNWHvP8aLnHfm.epO/XhVF1Pvk1Chy2Fqa.4x232a374xK,$2a$10$aAvr1DH3Pt3KLPDYa.JED..d83Pl6M4xnQd6uY8fadNkqSEv9KaYK,$2a$10$tJkqyS9Us36ernP1N4/8dOE088rCm7MC3gIj2RQMlaalY34EkkDHy,$2a$10$V0tThT/XnmlHbJucM00yN.hxz9r3ZwVqe0sQRQwDZAGHmhMq81D7O,$2a$10$d9vj8iPtD/L/i2B5AhiKTexOSpZ52XZTRUDkZa4p0vnI6RCj7f0K2,$2a$10$PZdCvVJB8301iDIZrZ2z8uB9d68kaBeOjaOIbbXGgqlZ2frbTm0eG,$2a$10$SHK.YTrTQcol/yM/RD8tZOcIF2fUTXtaETpDo8G0At90NxQ1HGk.C,$2a$10$QYz2Fp2fFZNq80HjAC/Okuy/PZFMgGgpPuQAyFDVtvB0G9bCn8Cee,$2a$10$SGY3HoCX0jbBXHSbpwGH1OEC/yPwT5792MjSZeWYVLew52pE4gR0y,$2a$10$QHPpvXwCXhHtKyx4jWMTh.8Mz1azTEQbDdDMpmikOzdgKtFfOq3FG,$2a$10$KoTsqdLPNIBiLRHWUg/6guqxNrB4ByljnMDTN0HJXmGl.PagdxpGm,$2a$10$ERsxnbIwk0LOMqmFX1SfjuMSXzh5gsBqm1BnYXFNEBAS3J1AfK24m,$2a$10$zwX4F0/pxXgmuAfDteFlHeXswX8cvVAvkv8mBAJ4WLvAEaUM3v266,$2a$10$LPA4FUIjg6CbZYEhi3NLRuY2Yar5SbT9gYoQ/oZuPaFUxNUyaJ/ii,$2a$10$kLDISr9ivaqcYiAZ1TmBOeclXK0C5a/LPPB3Rsxme19NwVPhznQya,$2a$10$qpoxy4k4sQya0tY7/lSEkuEuwVQGEl757A.jVPGNEh6p5tN6Yofyq,$2a$10$TDndpw.NWwx2k5X.9eI30uAaga8pbYO/erUEblVGcj6ydzSgzdVde,$2a$10$TtZtCWXgVSUhaNMMsuOjLuC6tCY1GTzUR/PvIUdowXYQdmefgpvbW,$2a$10$Orj1ZiURJkREK30gdwEYLeV7mY657jJhif8SckIPdvctjkWHXHrq6,$2a$10$CxEXDLjFtK1.nE9GuIt1duxLbvYtz2EA7x1LqddNF44kKVcc8aGZC

controller.py CHANGED Viewed

@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 from rethink_gemini_agents.rethink_chat import gemini_llm_chat
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
@@ -295,9 +296,16 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
         csv_url = request.get("csv_url")
         decoded_url = unquote(csv_url)
-        # gemini_answer = await asyncio.to_thread(gemini_llm_chat, decoded_url, query)
-        # logger.info("gemini_answer --> ", gemini_answer)
-        # return {"answer": gemini_answer}
         if if_initial_chat_question(query):
             answer = await asyncio.to_thread(

 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
+from rethink_gemini_agents.gemini_langchain_service import langchain_gemini_csv_chat
 from rethink_gemini_agents.rethink_chat import gemini_llm_chat
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
         csv_url = request.get("csv_url")
         decoded_url = unquote(csv_url)
+        if if_initial_chat_question(query):
+            answer = await asyncio.to_thread(
+                langchain_gemini_csv_chat, decoded_url, query, False
+            )
+            logger.info("gemini langchain_answer --> ", answer)
+            return {"answer": jsonable_encoder(answer)}
+        gemini_answer = await asyncio.to_thread(gemini_llm_chat, decoded_url, query)
+        logger.info("gemini_answer --> ", gemini_answer)
+        return {"answer": gemini_answer}
         if if_initial_chat_question(query):
             answer = await asyncio.to_thread(

rethink_gemini_agents/gemini_langchain_service.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import re
+import uuid
+from langchain_google_genai import ChatGoogleGenerativeAI
+import pandas as pd
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_experimental.tools import PythonAstREPLTool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from dotenv import load_dotenv
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+import seaborn as sns
+# Set the backend for matplotlib to 'Agg' to avoid GUI issues
+matplotlib.use('Agg')
+load_dotenv()
+model_name = os.getenv("GOOGLE_GENERATIVE_AI_MODEL_LANGCHAIN_AGENT")
+google_api_keys = os.getenv("GOOGLE_GENERATIVE_AI_API_KEYS").split(",")
+current_key_index = 0  # Global index for API keys
+def _prompt_generator(question: str, chart_required: bool):
+    chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
+                         1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
+                         2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
+                         3. **Communication:** Provide concise, professional, and well-structured responses.
+                         4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
+                         **Query:** {question}
+                    """
+    chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
+                       1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
+                       2. Visualization requirements:
+                       - Adjust font sizes, rotate labels (45° if needed), truncate for readability
+                       - Figure size: (12, 6)
+                       - Descriptive titles (fontsize=14)
+                       - Colorblind-friendly palettes
+                       3. File handling rules:
+                       - Create MAXIMUM 2 charts if absolutely necessary
+                       - For multiple charts:
+                           * Arrange in grid format (2x1 vertical layout preferred)
+                           * Use SAME unique_id with suffixes:
+                           - f"{{unique_id}}_1.png"
+                           - f"{{unique_id}}_2.png"
+                       - Save EXCLUSIVELY to "generated_charts" folder
+                       - File naming: f"chart_{{unique_id}}.png" (for single chart)
+                       4. FINAL OUTPUT MUST BE:
+                       - For single chart: f"generated_charts/chart_{{unique_id}}.png"
+                       - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
+                       - ONLY return this full path string, nothing else
+                       **Query:** {question}
+                       IMPORTANT:
+                       - Generate the unique_id FIRST before any operations
+                       - Use THE SAME unique_id throughout entire process
+                       - NEVER generate new UUIDs after initial creation
+                       - Return EXACT filepath string of the final saved chart
+                       """
+    if chart_required:
+        return ChatPromptTemplate.from_template(chart_prompt)
+    else:
+        return ChatPromptTemplate.from_template(chat_prompt)
+def langchain_gemini_csv_chat(csv_url: str, question: str, chart_required: bool):
+    global current_key_index
+    data = pd.read_csv(csv_url)
+    # Try each API key until a successful response is generated or keys run out
+    attempts = 0
+    total_keys = len(google_api_keys)
+    while attempts < total_keys:
+        try:
+            # Select the current API key
+            api_key = google_api_keys[current_key_index]
+            print(f"Using API key index {current_key_index}")
+            # Initialize the LLM with the current API key
+            llm = ChatGoogleGenerativeAI(model=model_name, api_key=api_key)
+            # Prepare the Python REPL tool with the dataframe and necessary libraries
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,  # Ensure plt is available
+                "sns": sns,
+                "matplotlib": matplotlib,
+                "uuid": uuid,
+            })
+            # Create the pandas agent with the provided tools and settings
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="openai-tools",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            chat_prompt = _prompt_generator(question, chart_required)
+            # Attempt to invoke the agent with the question
+            result = agent.invoke({"input": chat_prompt})
+            # If successful, return the output
+            return result.get("output")
+        except Exception as e:
+            # Log the error along with the current API key index
+            print(f"Error using API key index {current_key_index}: {e}")
+            # Move to the next API key
+            current_key_index += 1
+            attempts += 1
+            # If all keys have been exhausted, exit the loop
+            if current_key_index >= total_keys:
+                print("All API keys have been exhausted.")
+                return None
+def langchain_gemini_csv_chart(csv_url: str, question: str, chart_required: bool):
+    global current_key_index
+    data = pd.read_csv(csv_url)
+    # Try each API key until a successful response is generated or keys run out
+    attempts = 0
+    total_keys = len(google_api_keys)
+    while attempts < total_keys:
+        try:
+            # Select the current API key
+            api_key = google_api_keys[current_key_index]
+            print(f"Using API key index {current_key_index}")
+            # Initialize the LLM with the current API key
+            llm = ChatGoogleGenerativeAI(model=model_name, api_key=api_key)
+            # Prepare the Python REPL tool with the dataframe and necessary libraries
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,  # Ensure plt is available
+                "sns": sns,
+                "matplotlib": matplotlib
+            })
+            # Create the pandas agent with the provided tools and settings
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="openai-tools",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            chart_prompt = _prompt_generator(question, chart_required)
+            # Attempt to invoke the agent with the question
+            result = agent.invoke({"input": chart_prompt})
+            # If successful, return the output
+            return result.get("output")
+        except Exception as e:
+            # Log the error along with the current API key index
+            print(f"Error using API key index {current_key_index}: {e}")
+            # Move to the next API key
+            current_key_index += 1
+            attempts += 1
+            # If all keys have been exhausted, exit the loop
+            if current_key_index >= total_keys:
+                print("All API keys have been exhausted.")
+                return None
+# Example usage:
+# if __name__ == "__main__":
+#     csv_url = "./documents/titanic.csv"
+#     question = "Create 2 beautiful visualizations of the data using different chart styles (line, bar etc..), return file names"
+#     output = langchain_gemini_csv_chat(csv_url, question, True)
+#     print("Agent output:", output)
+#     # Define a regex pattern that matches 'temp' followed by one or more digits.
+#     pattern = r"temp\d+"
+#     # Use re.findall to extract all occurrences that match the pattern.
+#     names = re.findall(pattern, output)
+#     print(names)