Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Mar 28

Commit

95fd2fd

1 Parent(s): 4dee00f

Fixed langchain gemini agent underscore issue

Browse files

Files changed (2) hide show

gemini_langchain_agent.py +127 -0
orchestrator_functions.py +64 -56

gemini_langchain_agent.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import re
+import uuid
+from langchain_google_genai import ChatGoogleGenerativeAI
+import pandas as pd
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_experimental.tools import PythonAstREPLTool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from dotenv import load_dotenv
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+import seaborn as sns
+import datetime as dt
+# Set the backend for matplotlib to 'Agg' to avoid GUI issues
+matplotlib.use('Agg')
+load_dotenv()
+model_name = 'gemini-2.0-flash'  # Model name for Google Generative AI
+google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")
+current_key_index = 0  # Global index for API keys
+def create_agent(llm, data, tools):
+    """Create agent with tool names"""
+    return create_pandas_dataframe_agent(
+        llm,
+        data,
+        agent_type="tool-calling",
+        verbose=True,
+        allow_dangerous_code=True,
+        extra_tools=tools,
+        return_intermediate_steps=True
+    )
+def _prompt_generator(question: str, chart_required: bool):
+    chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
+                         1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
+                         2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
+                         3. **Communication:** Provide concise, professional, and well-structured responses.
+                         4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
+                         **Query:** {question}
+                    """
+    chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
+                       1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
+                       2. Visualization requirements:
+                       - Adjust font sizes, rotate labels (45° if needed), truncate for readability
+                       - Figure size: (12, 6)
+                       - Descriptive titles (fontsize=14)
+                       - Colorblind-friendly palettes
+                       3. File handling rules:
+                       - Create MAXIMUM 2 charts if absolutely necessary
+                       - For multiple charts:
+                           * Arrange in grid format (2x1 vertical layout preferred)
+                           * Use SAME unique_id with suffixes:
+                           - f"{{unique_id}}_1.png"
+                           - f"{{unique_id}}_2.png"
+                       - Save EXCLUSIVELY to "generated_charts" folder
+                       - File naming: f"chart_{{unique_id}}.png" (for single chart)
+                       4. FINAL OUTPUT MUST BE:
+                       - For single chart: f"generated_charts/chart_{{unique_id}}.png"
+                       - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
+                       - **ONLY return this full path string, nothing else**
+                       **Query:** {question}
+                       IMPORTANT:
+                       - Generate the unique_id FIRST before any operations
+                       - Use THE SAME unique_id throughout entire process
+                       - NEVER generate new UUIDs after initial creation
+                       - Return EXACT filepath string of the final saved chart
+                       """
+    if chart_required:
+        return ChatPromptTemplate.from_template(chart_prompt)
+    else:
+        return ChatPromptTemplate.from_template(chat_prompt)
+def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
+    global current_key_index
+    data = pd.read_csv(csv_url)
+    attempts = 0
+    total_keys = len(google_api_keys)
+    while attempts < total_keys:
+        try:
+            api_key = google_api_keys[current_key_index]
+            print(f"Using API key index {current_key_index}")
+            llm = ChatGoogleGenerativeAI(model=model_name, api_key=api_key)
+            # Create tool with validated name
+            tool = PythonAstREPLTool(
+                locals={
+                    "df": data,
+                    "pd": pd,
+                    "np": np,
+                    "plt": plt,
+                    "sns": sns,
+                    "matplotlib": matplotlib,
+                    "uuid": uuid,
+                    "dt": dt
+                },
+            )
+            agent = create_agent(llm, data, [tool])
+            prompt = _prompt_generator(question, chart_required)
+            result = agent.invoke({"input": prompt})
+            return result.get("output")
+        except Exception as e:
+            print(f"Error using API key index {current_key_index}: {e}")
+            current_key_index = (current_key_index + 1) % total_keys
+            attempts += 1
+    print("All API keys have been exhausted.")
+    return None

orchestrator_functions.py CHANGED Viewed

@@ -19,6 +19,7 @@ import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
 from supabase_service import upload_image_to_supabase
 from util_service import _prompt_generator, process_answer
 import matplotlib
@@ -385,78 +386,85 @@ async def csv_chart(csv_url: str, query: str):
-async def csv_chat(csv_url: str, query: str):
-    try:
-        updated_query = f"{query} and Do not show any charts or graphs."
-        # Process with langchain_chat first
-        try:
-            lang_answer = await asyncio.to_thread(
-                langchain_csv_chat, csv_url, query, False
-            )
-            if lang_answer is not None:
-                return {"answer": jsonable_encoder(lang_answer)}
-            raise Exception("Langchain failed to process")
-        except Exception as langchain_error:
-            print(f"Langchain error, falling back to Groq: {str(langchain_error)}")
-            # Process with groq_chat if langchain fails
-            try:
-                groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
-                print("groq_answer:", groq_answer)
-                if process_answer(groq_answer) == "Empty response received.":
-                    return {"answer": "Sorry, I couldn't find relevant data..."}
-                if process_answer(groq_answer) or groq_answer is None:
-                    return {"answer": "error"}
-                return {"answer": jsonable_encoder(groq_answer)}
-            except Exception as groq_error:
-                print(f"Groq processing error: {str(groq_error)}")
-                return {"answer": "error"}
-    except Exception as e:
-        print(f"Error processing request: {str(e)}")
-        return {"answer": "error"}
-# async def csv_chat(csv_url: str, query: str):
-#     try:
-#         updated_query = f"{query} and Do not show any charts or graphs."
-#         # Process with Groq first
-#         try:
-#             groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
-#             print("groq_answer:", groq_answer)
-#             if process_answer(groq_answer) == "Empty response received." or groq_answer == None:
-#                 return {"answer": "Sorry, I couldn't find relevant data..."}
-#             if process_answer(groq_answer) or groq_answer == None:
-#                 raise Exception("Groq response not usable, falling back to LangChain")
-#             return {"answer": jsonable_encoder(groq_answer)}
-#         except Exception as groq_error:
-#             print(f"Groq error, falling back to LangChain: {str(groq_error)}")
-#             # Process with LangChain if Groq fails
-#             try:
-#                 lang_answer = await asyncio.to_thread(
-#                     langchain_csv_chat, csv_url, query, False
-#                 )
-#                 if not process_answer(lang_answer):
-#                     return {"answer": jsonable_encoder(lang_answer)}
-#                 return {"answer": "Sorry, I couldn't find relevant data..."}
-#             except Exception as langchain_error:
-#                 print(f"LangChain processing error: {str(langchain_error)}")
-#                 return {"answer": "error"}
-#     except Exception as e:
-#         print(f"Error processing request: {str(e)}")
-#         return {"answer": "error"}

 import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
+from gemini_langchain_agent import langchain_gemini_csv_handler
 from supabase_service import upload_image_to_supabase
 from util_service import _prompt_generator, process_answer
 import matplotlib
+# async def csv_chat(csv_url: str, query: str):
+#     try:
+#         updated_query = f"{query} and Do not show any charts or graphs."
+#         # Process with langchain_chat first
+#         try:
+#             lang_answer = await asyncio.to_thread(
+#                 langchain_csv_chat, csv_url, query, False
+#             )
+#             if lang_answer is not None:
+#                 return {"answer": jsonable_encoder(lang_answer)}
+#             raise Exception("Langchain failed to process")
+#         except Exception as langchain_error:
+#             print(f"Langchain error, falling back to Groq: {str(langchain_error)}")
+#             # Process with groq_chat if langchain fails
+#             try:
+#                 groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
+#                 print("groq_answer:", groq_answer)
+#                 if process_answer(groq_answer) == "Empty response received.":
+#                     return {"answer": "Sorry, I couldn't find relevant data..."}
+#                 if process_answer(groq_answer) or groq_answer is None:
+#                     return {"answer": "error"}
+#                 return {"answer": jsonable_encoder(groq_answer)}
+#             except Exception as groq_error:
+#                 print(f"Groq processing error: {str(groq_error)}")
+#                 return {"answer": "error"}
+#     except Exception as e:
+#         print(f"Error processing request: {str(e)}")
+#         return {"answer": "error"}
+async def csv_chat(csv_url: str, query: str):
+    try:
+        updated_query = f"{query}"
+        # Process with gemini langchain chat first
+        try:
+            # groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
+            # print("groq_answer:", groq_answer)
+            # if process_answer(groq_answer) == "Empty response received." or groq_answer == None:
+            #     return {"answer": "Sorry, I couldn't find relevant data..."}
+            # if process_answer(groq_answer) or groq_answer == None:
+            #     raise Exception("Groq response not usable, falling back to LangChain")
+            # return {"answer": jsonable_encoder(groq_answer)}
+            lc_gemini = await asyncio.to_thread(
+                langchain_gemini_csv_handler, csv_url, updated_query, False
+            )
+            if lc_gemini is not None:
+                return {"answer": jsonable_encoder(lc_gemini)}
+        except Exception as gemini_error:
+            print(f"Gemini error, falling back to Groq-LangChain: {str(gemini_error)}")
+            # Process with LangChain if Groq fails
+            try:
+                lang_answer = await asyncio.to_thread(
+                    langchain_csv_chat, csv_url, query, False
+                )
+                if not process_answer(lang_answer):
+                    return {"answer": jsonable_encoder(lang_answer)}
+                return {"answer": "Sorry, I couldn't find relevant data..."}
+            except Exception as langchain_error:
+                print(f"LangChain processing error: {str(langchain_error)}")
+                return {"answer": "error"}
+    except Exception as e:
+        print(f"Error processing request: {str(e)}")
+        return {"answer": "error"}