Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Apr 2

Commit

ec9f5b2

1 Parent(s): 388be94

modifies orchestrator,add code_exec tool, openai_chat (later we add chat)

Browse files

Files changed (5) hide show

code_exec_service.py +128 -0
openai_pandasai_service.py +145 -0
orchestrator_agent.py +40 -49
orchestrator_functions.py +93 -75
util_service.py +1 -1

code_exec_service.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import datetime
+import io
+import time
+from contextlib import redirect_stdout, redirect_stderr
+import uuid
+from matplotlib import pyplot as plt
+import pandas as pd
+import numpy as np
+import traceback
+import seaborn as sns
+plt.style.use('seaborn-v0_8-whitegrid')
+class PythonDataAnalysisExecutor:
+    """
+    Simplified Python code execution environment for data analysis
+    """
+    def __init__(self, timeout_seconds=30):
+        self.timeout = timeout_seconds
+        self.safe_globals = {
+            '__builtins__': {
+                'print': print, 'range': range, 'len': len,
+                'str': str, 'int': int, 'float': float, 'bool': bool,
+                'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
+                'min': min, 'max': max, 'sum': sum, 'abs': abs,
+                'round': round, 'zip': zip, 'enumerate': enumerate,
+                '__import__': __import__
+            },
+            'pd': pd, 'np': np,
+            'matplotlib.pyplot': plt,
+            'seaborn': sns,
+            'uuid': uuid.uuid4,
+            'datetime': datetime, 'time': time,
+            'DataFrame': pd.DataFrame, 'Series': pd.Series
+        }
+        self.last_result = None
+    def _validate_code(self, code):
+        """Basic security checks"""
+        forbidden = ['sys.', 'open(', 'exec(', 'eval(']
+        if any(f in code for f in forbidden):
+            raise ValueError("Potentially unsafe code detected")
+    def execute(self, code: str) -> dict:
+        """
+        Execute Python code safely with timeout
+        Returns dict with: success, output, error, execution_time, variables, result
+        """
+        result = {
+            'success': False,
+            'output': '',
+            'error': '',
+            'execution_time': 0,
+            'variables': {},
+            'result': None  # This will capture the last expression result
+        }
+        start_time = time.time()
+        output = io.StringIO()
+        try:
+            self._validate_code(code)
+            with redirect_stdout(output), redirect_stderr(output):
+                # Split into lines and handle last expression
+                lines = [line for line in code.split('\n') if line.strip()]
+                if lines:
+                    # Execute all but last line normally
+                    if len(lines) > 1:
+                        exec('\n'.join(lines[:-1]), self.safe_globals)
+                    # Handle last line specially to capture its value
+                    last_line = lines[-1].strip()
+                    if last_line:
+                        # If it's an expression (not assignment or control structure)
+                        if not (last_line.startswith((' ', '\t')) or
+                               last_line.split()[0] in ('if', 'for', 'while', 'def', 'class') or
+                               '=' in last_line):
+                            self.last_result = eval(last_line, self.safe_globals)
+                            result['result'] = self.last_result
+                            output.write(str(self.last_result) + '\n')
+                        else:
+                            exec(last_line, self.safe_globals)
+            result['output'] = output.getvalue()
+            result['variables'] = {
+                k: v for k, v in self.safe_globals.items()
+                if not k.startswith('__') and k in code
+            }
+            result['success'] = True
+        except Exception as e:
+            result['error'] = f"{str(e)}\n{traceback.format_exc()}"
+        result['execution_time'] = time.time() - start_time
+        return result
+def run_analysis(code: str, timeout=20) -> dict:
+    """Simplified interface for code execution"""
+    executor = PythonDataAnalysisExecutor(timeout_seconds=timeout)
+    return executor.execute(code)
+# Example usage
+# if __name__ == "__main__":
+#     analysis_code = """
+# import datetime
+# print(datetime.datetime.now())
+# """
+#     result = run_analysis(analysis_code)
+#     # Improved output formatting
+#     if result['success']:
+#         print("Execution successful")
+#         print("Execution time:", result['execution_time'], "seconds")
+#         print("Output:", result['output'].strip())
+#         print("Result:", result['result'])
+#         print("Variables:", list(result['variables'].keys()))
+#     else:
+#         print("Execution failed")
+#         print("Error:", result['error'])

openai_pandasai_service.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+import threading
+import uuid
+from langchain_openai import ChatOpenAI
+import pandas as pd
+from pandasai import SmartDataframe
+from csv_service import clean_data
+from dotenv import load_dotenv
+from util_service import handle_out_of_range_float, process_answer
+load_dotenv()
+openai_api_keys = os.getenv("OPENAI_API_KEYS").split(",")
+openai_api_base = os.getenv("OPENAI_API_BASE")
+# Thread-safe key management for openai_chat
+current_openai_key_index = 0
+current_openai_key_lock = threading.Lock()
+instructions = instructions = """
+- Please ensure that each value is clearly visible, You may need to adjust the font size, rotate the labels, or use truncation to improve readability (if needed).
+- For multiple charts, arrange them in a grid format (2x2, 3x3, etc.)
+- Use professional and color-blind friendly palettes.
+- Do not use sns.set_palette()
+- Read above instructions and follow them.
+"""
+# Modified openai_chat function with thread-safe key rotation
+openai_model_name = 'gpt-4o'
+def openai_chat(csv_url: str, question: str):
+    global current_openai_key_index, current_openai_key_lock
+    while True:
+        with current_openai_key_lock:
+            if current_openai_key_index >= len(openai_api_keys):
+                return {"error": "All API keys exhausted."}
+            current_api_key = openai_api_keys[current_openai_key_index]
+        try:
+            data = clean_data(csv_url)
+            llm = ChatOpenAI(model=openai_model_name, api_key=current_api_key,base_url=openai_api_base)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question)
+            # Process different response types
+            if isinstance(answer, pd.DataFrame):
+                processed = answer.apply(handle_out_of_range_float).to_dict(orient="records")
+            elif isinstance(answer, pd.Series):
+                processed = answer.apply(handle_out_of_range_float).to_dict()
+            elif isinstance(answer, list):
+                processed = [handle_out_of_range_float(item) for item in answer]
+            elif isinstance(answer, dict):
+                processed = {k: handle_out_of_range_float(v) for k, v in answer.items()}
+            else:
+                processed = {"answer": str(handle_out_of_range_float(answer))}
+            if process_answer(processed):
+                return {"error": "Answer is not valid."}
+            return processed
+        except Exception as e:
+            error_message = str(e)
+            if error_message:
+                with current_openai_key_lock:
+                    current_openai_key_index += 1
+                    if current_openai_key_index >= len(openai_api_keys):
+                        print("All API keys exhausted.")
+                        return None
+            else:
+                print(f"Error with API key index {current_openai_key_index}: {error_message}")
+                return None
+def openai_chart(csv_url: str, question: str):
+    global current_openai_key_index, current_openai_key_lock
+    while True:
+        with current_openai_key_lock:
+            if current_openai_key_index >= len(openai_api_keys):
+                return {"error": "All API keys exhausted."}
+            current_api_key = openai_api_keys[current_openai_key_index]
+        try:
+            data = clean_data(csv_url)
+            llm = ChatOpenAI(model=openai_model_name, api_key=current_api_key,base_url=openai_api_base)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question + instructions)
+            if process_answer(answer):
+                return "Chart not generated"
+            return answer
+        except Exception as e:
+            error = str(e)
+            print(f"Error with API key index {current_openai_key_index}: {error}")
+            if "429" in error or error is not None:
+                with current_openai_key_lock:
+                    current_openai_key_index = (current_openai_key_index + 1) % len(openai_api_keys)
+            else:
+                print(f"Chart generation error: {error}")
+                return {"error": error}
+        print("All API keys exhausted for chart generation")
+        return None

orchestrator_agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from typing import Dict, List, Any
 from pydantic_ai import Agent
@@ -7,10 +8,12 @@ from pydantic_ai.providers.google_gla import GoogleGLAProvider
 from pydantic_ai import RunContext
 from pydantic import BaseModel
 from google.api_core.exceptions import ResourceExhausted  # Import the exception for quota exhaustion
 from csv_service import get_csv_basic_info
 from orchestrator_functions import csv_chart, csv_chat
 from dotenv import load_dotenv
 load_dotenv()
@@ -125,8 +128,9 @@ def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agen
    - Highlight limitations/caveats
 5. TOOL USAGE:
-   - Can process statistical operations
-   - Supports visualization libraries
 ## Current Context:
 - Working with CSV_URL: {csv_url}
@@ -141,58 +145,44 @@ def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agen
 4. Provide interpretation
 5. Offer next-step suggestions
 """
-    # system_prompt = (
-    # "You are a data analyst. "
-    # "You have all the tools you need to answer any question. "
-    # "If the user asks for multiple answers or charts, break the question into several well-defined questions. "
-    # "Pass the CSV URL or file path along with the questions to the tools to generate the answer. "
-    # "The tools are actually LLMs with Python code execution capabilities. "
-    # "Modify the query if needed to make it simpler for the LLM to understand. "
-    # "Answer in a friendly and helpful manner. "
-    # "**Format images** in Markdown: `![alt_text](direct-image-url)`. "
-    # f"Your CSV URL is {csv_url}. "
-    # f"Your CSV metadata is {csv_metadata}."
-    # )
-    # system_prompt = (
-    #     "You are a data analyst assistant with limited tool capabilities. "
-    #     "Available tools can only handle simple data queries: "
-    #     "- Count rows/columns\n- Calculate basic stats (avg, sum, min/max)\n"
-    #     "- Create simple visualizations (pie charts, bar graphs)\n"
-    #     "- Show column names/types\n\n"
-    #     "Query Handling Rules:\n"
-    #     "1. If query is complex, ambiguous, or exceeds tool capabilities:\n"
-    #     "   - Break into simpler sub-questions\n"
-    #     "   - Ask for clarification\n"
-    #     "   - Rephrase to nearest simple query\n"
-    #     "2. For 'full report' requests:\n"
-    #     "   - Outline possible analysis steps\n"
-    #     "   - Ask user to select one component at a time\n\n"
-    #     "Examples:\n"
-    #     "- Bad query: 'Show me everything'\n"
-    #     "  Response: 'I can show row count (10), columns (5: Name, Age...), "
-    #     "or a pie chart of categories. Which would you like?'\n"
-    #     "- Bad query: 'Analyze trends'\n"
-    #     "  Response: 'For trend analysis, I can show monthly averages or "
-    #     "year-over-year comparisons. Please specify time period and metric.'\n\n"
-    #     "Current CSV Context:\n"
-    #     f"- URL: {csv_url}\n"
-    #     f"- Metadata: {csv_metadata}\n\n"
-    #     "Always format images as: ![Chart Description](direct-image-url)"
-    # )
-    return Agent(
         model=initialize_model(api_key),
         deps_type=str,
         tools=[generate_csv_answer, generate_chart],
         system_prompt=system_prompt
     )
 def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
     print("CSV URL:", csv_url)
@@ -216,3 +206,4 @@ def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history
     # If all keys are exhausted or fail
     print("All API keys have been exhausted or failed.")
     return None

+from datetime import datetime
 import os
 from typing import Dict, List, Any
 from pydantic_ai import Agent
 from pydantic_ai import RunContext
 from pydantic import BaseModel
 from google.api_core.exceptions import ResourceExhausted  # Import the exception for quota exhaustion
+from code_exec_service import run_analysis
 from csv_service import get_csv_basic_info
 from orchestrator_functions import csv_chart, csv_chat
 from dotenv import load_dotenv
 load_dotenv()
    - Highlight limitations/caveats
 5. TOOL USAGE:
+   - Python Code Executor Tool (To execute Python code, To get date-time, For lightweight data analysis etc.)
+   - Data Analysis Tool
+   - Chart Generation Tool
 ## Current Context:
 - Working with CSV_URL: {csv_url}
 4. Provide interpretation
 5. Offer next-step suggestions
 """
+    gemini_csv_orchestrator_agent = Agent(
         model=initialize_model(api_key),
         deps_type=str,
         tools=[generate_csv_answer, generate_chart],
         system_prompt=system_prompt
     )
+    @gemini_csv_orchestrator_agent.tool_plain
+    def python_code_executor(analysis_code: str) -> dict:
+                """_summary_
+                Args:
+                    analysis_code (str): _description_
+                    Ex:
+                       df = pd.read_csv({csv_url})
+                       len(df)
+                Returns:
+                   dict: _description_
+                """
+                print(f'LLM Passed a code: {analysis_code}')
+                result = run_analysis(analysis_code)
+                if result['success']:
+                    print("Execution successful")
+                    print("Execution time:", result['execution_time'], "seconds")
+                    print("Output:", result['output'].strip())
+                    print("Result:", result['result'])
+                    print("Variables:", list(result['variables'].keys()))
+                    # convert the result to a string
+                    result_str = str(result['output'])
+                    return result_str
+                else:
+                    print("Execution failed")
+                    print("Error:", result['error'])
+                    error_str = str(result['error'])
+                    return error_str
+    return gemini_csv_orchestrator_agent
 def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
     print("CSV URL:", csv_url)
     # If all keys are exhausted or fail
     print("All API keys have been exhausted or failed.")
     return None

orchestrator_functions.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # Import necessary modules
 import asyncio
 import os
 import threading
 import uuid
@@ -20,6 +21,7 @@ import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
 from gemini_langchain_agent import langchain_gemini_csv_handler
 from supabase_service import upload_file_to_supabase
 from util_service import _prompt_generator, process_answer
 import matplotlib
@@ -69,6 +71,10 @@ def handle_out_of_range_float(value):
             return "Infinity"
     return value
 # Modified groq_chat function with thread-safe key rotation
 def groq_chat(csv_url: str, question: str):
@@ -87,7 +93,7 @@ def groq_chat(csv_url: str, question: str):
                 try:
                     os.remove(cache_db_path)
                 except Exception as e:
-                    print(f"Error deleting cache DB file: {e}")
             data = clean_data(csv_url)
             llm = ChatGroq(model=model_name, api_key=current_api_key)
@@ -129,10 +135,10 @@ def groq_chat(csv_url: str, question: str):
                 with current_groq_key_lock:
                     current_groq_key_index += 1
                     if current_groq_key_index >= len(groq_api_keys):
-                        print("All API keys exhausted.")
                         return None
             else:
-                print(f"Error with API key index {current_groq_key_index}: {error_message}")
                 return None
@@ -183,10 +189,10 @@ def langchain_csv_chat(csv_url: str, question: str, chart_required: bool):
             return result.get("output")
         except Exception as e:
-            print(f"Error with key index {current_key}: {str(e)}")
     # If all keys are exhausted, return None
-    print("All API keys have been exhausted.")
     return None
@@ -241,7 +247,7 @@ def groq_chart(csv_url: str, question: str):
                 try:
                     os.remove(cache_db_path)
                 except Exception as e:
-                    print(f"Cache cleanup error: {e}")
             data = clean_data(csv_url)
             with current_groq_chart_lock:
@@ -277,10 +283,10 @@ def groq_chart(csv_url: str, question: str):
                 with current_groq_chart_lock:
                     current_groq_chart_key_index = (current_groq_chart_key_index + 1) % len(groq_api_keys)
             else:
-                print(f"Chart generation error: {error}")
                 return {"error": error}
-    print("All API keys exhausted for chart generation")
     return None
@@ -327,12 +333,12 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
                 return chart_files
             if attempt < len(groq_api_keys) - 1:
-                print(f"Langchain chart error (key {current_key}): {output}")
         except Exception as e:
-            print(f"Langchain chart error (key {current_key}): {str(e)}")
-    print("All API keys exhausted for chart generation")
     return None
@@ -363,50 +369,50 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
 #         # First try Groq-based chart generation
 #         try:
 #             groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
-#             print(f"Generated Chart (Groq): {groq_result}")
 #             if groq_result != 'Chart not generated':
 #                 unique_file_name = f'{str(uuid.uuid4())}.png'
 #                 image_public_url = await upload_file_to_supabase(groq_result, unique_file_name)
-#                 print(f"Image uploaded to Supabase: {image_public_url}")
 #                 return {"image_url": image_public_url}
 #         except Exception as groq_error:
-#             print(f"Groq chart generation failed, falling back to Langchain: {str(groq_error)}")
 #         # Fallback to Langchain if Groq fails
 #         try:
 #             langchain_paths = await asyncio.to_thread(langchain_csv_chart, csv_url, query, True)
-#             print("Fallback langchain chart result:", langchain_paths)
 #             if isinstance(langchain_paths, list) and len(langchain_paths) > 0:
 #                 unique_file_name = f'{str(uuid.uuid4())}.png'
-#                 print("Uploading the chart to supabase...")
 #                 image_public_url = await upload_file_to_supabase(langchain_paths[0], unique_file_name)
-#                 print("Image uploaded to Supabase and Image URL is... ", image_public_url)
 #                 return {"image_url": image_public_url}
 #         except Exception as langchain_error:
-#             print(f"Langchain chart generation also failed: {str(langchain_error)}")
 #             try:
 #                 # Last resort: Try with the gemini langchain agent
-#                 print("Trying with the gemini langchain agent...")
 #                 lc_gemini_chart_result = await asyncio.to_thread(langchain_gemini_csv_handler, csv_url, query, True)
 #                 if lc_gemini_chart_result is not None:
 #                     clean_path = lc_gemini_chart_result.strip()
 #                     unique_file_name = f'{str(uuid.uuid4())}.png'
-#                     print("Uploading the chart to supabase...")
 #                     image_public_url = await upload_file_to_supabase(clean_path, unique_file_name)
-#                     print("Image uploaded to Supabase and Image URL is... ", image_public_url)
 #                     return {"image_url": image_public_url}
 #             except Exception as gemini_error:
-#                 print(f"Gemini Langchain chart generation also failed: {str(gemini_error)}")
 #         # If both methods fail
 #         return {"error": "Could not generate the chart, please try again."}
 #     except Exception as e:
-#         print(f"Critical chart error: {str(e)}")
 #         return {"error": "Internal system error"}
@@ -436,7 +442,7 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
 #         # Process with Groq first
 #         try:
 #             groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
-#             print("groq_answer:", groq_answer)
 #             if process_answer(groq_answer) == "Empty response received." or groq_answer == None:
 #                 return {"answer": "Sorry, I couldn't find relevant data..."}
@@ -447,7 +453,7 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
 #             return {"answer": jsonable_encoder(groq_answer)}
 #         except Exception as groq_error:
-#             print(f"Groq error, falling back to LangChain: {str(groq_error)}")
 #             # Process with LangChain if Groq fails
 #             try:
@@ -458,7 +464,7 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
 #                     return {"answer": jsonable_encoder(lang_answer)}
 #                 return {"answer": "Sorry, I couldn't find relevant data..."}
 #             except Exception as langchain_error:
-#                 print(f"LangChain processing error: {str(langchain_error)}")
 #                 # last resort: Try with the gemini langchain agent
 #                 try:
@@ -469,11 +475,11 @@ def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
 #                         return {"answer": jsonable_encoder(gemini_answer)}
 #                     return {"answer": "Sorry, I couldn't find relevant data..."}
 #                 except Exception as gemini_error:
-#                     print(f"Gemini Langchain processing error: {str(gemini_error)}")
 #                     return {"answer": "error"}
 #     except Exception as e:
-#         print(f"Error processing request: {str(e)}")
 #         return {"answer": "error"}
@@ -511,7 +517,7 @@ async def csv_chat(csv_url: str, query: str):
             gemini_answer = await asyncio.to_thread(
                 langchain_gemini_csv_handler, csv_url, updated_query, False
             )
-            print("LangChain-Gemini answer:", gemini_answer)
             if not process_answer(gemini_answer) or gemini_answer is None:
                 return {"answer": jsonable_encoder(gemini_answer)}
@@ -519,14 +525,14 @@ async def csv_chat(csv_url: str, query: str):
             raise Exception("LangChain-Gemini response not usable, falling back to LangChain-Groq")
         except Exception as gemini_error:
-            print(f"LangChain-Gemini error: {str(gemini_error)}")
             # --- 2. Second Attempt: LangChain Groq ---
             try:
                 lang_groq_answer = await asyncio.to_thread(
                     langchain_csv_chat, csv_url, updated_query, False
                 )
-                print("LangChain-Groq answer:", lang_groq_answer)
                 if not process_answer(lang_groq_answer):
                     return {"answer": jsonable_encoder(lang_groq_answer)}
@@ -534,12 +540,12 @@ async def csv_chat(csv_url: str, query: str):
                 raise Exception("LangChain-Groq response not usable, falling back to raw Groq")
             except Exception as lang_groq_error:
-                print(f"LangChain-Groq error: {str(lang_groq_error)}")
                 # --- 3. Final Attempt: Raw Groq Chat ---
                 try:
                     raw_groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
-                    print("Raw Groq answer:", raw_groq_answer)
                     if process_answer(raw_groq_answer) == "Empty response received." or raw_groq_answer is None:
                         return {"answer": "Sorry, I couldn't find relevant data..."}
@@ -550,11 +556,11 @@ async def csv_chat(csv_url: str, query: str):
                     return {"answer": jsonable_encoder(raw_groq_answer)}
                 except Exception as raw_groq_error:
-                    print(f"Raw Groq error: {str(raw_groq_error)}")
                     return {"answer": "error"}
     except Exception as e:
-        print(f"Unexpected error: {str(e)}")
         return {"answer": "error"}
@@ -567,7 +573,7 @@ async def csv_chat(csv_url: str, query: str):
 async def csv_chart(csv_url: str, query: str):
     """
     Generate a chart based on the provided CSV URL and query.
-    Prioritizes raw Groq, then LangChain Gemini, and finally LangChain Groq as fallback.
     Parameters:
     - csv_url (str): The URL of the CSV file.
@@ -589,61 +595,73 @@ async def csv_chart(csv_url: str, query: str):
         """Helper function to handle image uploads"""
         unique_name = f'{uuid.uuid4()}.png'
         public_url = await upload_file_to_supabase(image_path, unique_name)
-        print(f"Uploaded chart: {public_url}")
         os.remove(image_path) # Remove the local image file after upload
         return {"image_url": public_url}
     try:
-        # --- 1. First Attempt: Raw Groq ---
         try:
-            groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
-            print(f"Raw Groq chart result:", groq_result)
-            if groq_result and groq_result != 'Chart not generated':
-                return await upload_and_return(groq_result)
-            raise Exception("Raw Groq failed to generate chart")
-        except Exception as groq_error:
-            print(f"Raw Groq failed ({str(groq_error)}), trying LangChain Gemini...")
-            # --- 2. Second Attempt: LangChain Gemini ---
-            try:
-                gemini_result = await asyncio.to_thread(
-                    langchain_gemini_csv_handler, csv_url, query, True
-                )
-                print("LangChain Gemini chart result:", gemini_result)
-                # --- i) If Gemini result is a string, return it ---
-                if gemini_result and isinstance(gemini_result, str):
-                    clean_path = gemini_result.strip()
-                    return await upload_and_return(clean_path)
-                # --- ii) If Gemini result is a list, return the first element ---
-                if gemini_result and isinstance(gemini_result, list) and len(gemini_result) > 0:
-                    return await upload_and_return(gemini_result[0])
-                raise Exception("LangChain Gemini returned empty result")
-            except Exception as gemini_error:
-                print(f"LangChain Gemini failed ({str(gemini_error)}), trying LangChain Groq...")
-                # --- 3. Final Attempt: LangChain Groq ---
-                try:
-                    lc_groq_paths = await asyncio.to_thread(
-                        langchain_csv_chart, csv_url, query, True
-                    )
-                    print("LangChain Groq chart result:", lc_groq_paths)
-                    if isinstance(lc_groq_paths, list) and lc_groq_paths:
-                        return await upload_and_return(lc_groq_paths[0])
-                    return {"error": "All chart generation methods failed"}
-                except Exception as lc_groq_error:
-                    print(f"LangChain Groq failed: {str(lc_groq_error)}")
-                    return {"error": "Could not generate chart"}
     except Exception as e:
-        print(f"Critical error: {str(e)}")
         return {"error": "Internal system error"}

 # Import necessary modules
 import asyncio
+import logging
 import os
 import threading
 import uuid
 import matplotlib
 import seaborn as sns
 from gemini_langchain_agent import langchain_gemini_csv_handler
+from openai_pandasai_service import openai_chart
 from supabase_service import upload_file_to_supabase
 from util_service import _prompt_generator, process_answer
 import matplotlib
             return "Infinity"
     return value
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Modified groq_chat function with thread-safe key rotation
 def groq_chat(csv_url: str, question: str):
                 try:
                     os.remove(cache_db_path)
                 except Exception as e:
+                    logger.info(f"Error deleting cache DB file: {e}")
             data = clean_data(csv_url)
             llm = ChatGroq(model=model_name, api_key=current_api_key)
                 with current_groq_key_lock:
                     current_groq_key_index += 1
                     if current_groq_key_index >= len(groq_api_keys):
+                        logger.info("All API keys exhausted.")
                         return None
             else:
+                logger.info(f"Error with API key index {current_groq_key_index}: {error_message}")
                 return None
             return result.get("output")
         except Exception as e:
+            logger.info(f"Error with key index {current_key}: {str(e)}")
     # If all keys are exhausted, return None
+    logger.info("All API keys have been exhausted.")
     return None
                 try:
                     os.remove(cache_db_path)
                 except Exception as e:
+                    logger.info(f"Cache cleanup error: {e}")
             data = clean_data(csv_url)
             with current_groq_chart_lock:
                 with current_groq_chart_lock:
                     current_groq_chart_key_index = (current_groq_chart_key_index + 1) % len(groq_api_keys)
             else:
+                logger.info(f"Chart generation error: {error}")
                 return {"error": error}
+    logger.info("All API keys exhausted for chart generation")
     return None
                 return chart_files
             if attempt < len(groq_api_keys) - 1:
+                logger.info(f"Langchain chart error (key {current_key}): {output}")
         except Exception as e:
+            logger.info(f"Langchain chart error (key {current_key}): {str(e)}")
+    logger.info("All API keys exhausted for chart generation")
     return None
 #         # First try Groq-based chart generation
 #         try:
 #             groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
+#             logger.info(f"Generated Chart (Groq): {groq_result}")
 #             if groq_result != 'Chart not generated':
 #                 unique_file_name = f'{str(uuid.uuid4())}.png'
 #                 image_public_url = await upload_file_to_supabase(groq_result, unique_file_name)
+#                 logger.info(f"Image uploaded to Supabase: {image_public_url}")
 #                 return {"image_url": image_public_url}
 #         except Exception as groq_error:
+#             logger.info(f"Groq chart generation failed, falling back to Langchain: {str(groq_error)}")
 #         # Fallback to Langchain if Groq fails
 #         try:
 #             langchain_paths = await asyncio.to_thread(langchain_csv_chart, csv_url, query, True)
+#             logger.info("Fallback langchain chart result:", langchain_paths)
 #             if isinstance(langchain_paths, list) and len(langchain_paths) > 0:
 #                 unique_file_name = f'{str(uuid.uuid4())}.png'
+#                 logger.info("Uploading the chart to supabase...")
 #                 image_public_url = await upload_file_to_supabase(langchain_paths[0], unique_file_name)
+#                 logger.info("Image uploaded to Supabase and Image URL is... ", image_public_url)
 #                 return {"image_url": image_public_url}
 #         except Exception as langchain_error:
+#             logger.info(f"Langchain chart generation also failed: {str(langchain_error)}")
 #             try:
 #                 # Last resort: Try with the gemini langchain agent
+#                 logger.info("Trying with the gemini langchain agent...")
 #                 lc_gemini_chart_result = await asyncio.to_thread(langchain_gemini_csv_handler, csv_url, query, True)
 #                 if lc_gemini_chart_result is not None:
 #                     clean_path = lc_gemini_chart_result.strip()
 #                     unique_file_name = f'{str(uuid.uuid4())}.png'
+#                     logger.info("Uploading the chart to supabase...")
 #                     image_public_url = await upload_file_to_supabase(clean_path, unique_file_name)
+#                     logger.info("Image uploaded to Supabase and Image URL is... ", image_public_url)
 #                     return {"image_url": image_public_url}
 #             except Exception as gemini_error:
+#                 logger.info(f"Gemini Langchain chart generation also failed: {str(gemini_error)}")
 #         # If both methods fail
 #         return {"error": "Could not generate the chart, please try again."}
 #     except Exception as e:
+#         logger.info(f"Critical chart error: {str(e)}")
 #         return {"error": "Internal system error"}
 #         # Process with Groq first
 #         try:
 #             groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
+#             logger.info("groq_answer:", groq_answer)
 #             if process_answer(groq_answer) == "Empty response received." or groq_answer == None:
 #                 return {"answer": "Sorry, I couldn't find relevant data..."}
 #             return {"answer": jsonable_encoder(groq_answer)}
 #         except Exception as groq_error:
+#             logger.info(f"Groq error, falling back to LangChain: {str(groq_error)}")
 #             # Process with LangChain if Groq fails
 #             try:
 #                     return {"answer": jsonable_encoder(lang_answer)}
 #                 return {"answer": "Sorry, I couldn't find relevant data..."}
 #             except Exception as langchain_error:
+#                 logger.info(f"LangChain processing error: {str(langchain_error)}")
 #                 # last resort: Try with the gemini langchain agent
 #                 try:
 #                         return {"answer": jsonable_encoder(gemini_answer)}
 #                     return {"answer": "Sorry, I couldn't find relevant data..."}
 #                 except Exception as gemini_error:
+#                     logger.info(f"Gemini Langchain processing error: {str(gemini_error)}")
 #                     return {"answer": "error"}
 #     except Exception as e:
+#         logger.info(f"Error processing request: {str(e)}")
 #         return {"answer": "error"}
             gemini_answer = await asyncio.to_thread(
                 langchain_gemini_csv_handler, csv_url, updated_query, False
             )
+            logger.info("LangChain-Gemini answer:", gemini_answer)
             if not process_answer(gemini_answer) or gemini_answer is None:
                 return {"answer": jsonable_encoder(gemini_answer)}
             raise Exception("LangChain-Gemini response not usable, falling back to LangChain-Groq")
         except Exception as gemini_error:
+            logger.info(f"LangChain-Gemini error: {str(gemini_error)}")
             # --- 2. Second Attempt: LangChain Groq ---
             try:
                 lang_groq_answer = await asyncio.to_thread(
                     langchain_csv_chat, csv_url, updated_query, False
                 )
+                logger.info("LangChain-Groq answer:", lang_groq_answer)
                 if not process_answer(lang_groq_answer):
                     return {"answer": jsonable_encoder(lang_groq_answer)}
                 raise Exception("LangChain-Groq response not usable, falling back to raw Groq")
             except Exception as lang_groq_error:
+                logger.info(f"LangChain-Groq error: {str(lang_groq_error)}")
                 # --- 3. Final Attempt: Raw Groq Chat ---
                 try:
                     raw_groq_answer = await asyncio.to_thread(groq_chat, csv_url, updated_query)
+                    logger.info("Raw Groq answer:", raw_groq_answer)
                     if process_answer(raw_groq_answer) == "Empty response received." or raw_groq_answer is None:
                         return {"answer": "Sorry, I couldn't find relevant data..."}
                     return {"answer": jsonable_encoder(raw_groq_answer)}
                 except Exception as raw_groq_error:
+                    logger.info(f"Raw Groq error: {str(raw_groq_error)}")
                     return {"answer": "error"}
     except Exception as e:
+        logger.info(f"Unexpected error: {str(e)}")
         return {"answer": "error"}
 async def csv_chart(csv_url: str, query: str):
     """
     Generate a chart based on the provided CSV URL and query.
+    Prioritizes OpenAI, then raw Groq, then LangChain Gemini, and finally LangChain Groq as fallback.
     Parameters:
     - csv_url (str): The URL of the CSV file.
         """Helper function to handle image uploads"""
         unique_name = f'{uuid.uuid4()}.png'
         public_url = await upload_file_to_supabase(image_path, unique_name)
+        logger.info(f"Uploaded chart: {public_url}")
         os.remove(image_path) # Remove the local image file after upload
         return {"image_url": public_url}
     try:
         try:
+            # --- 1. First Attempt: OpenAI ---
+            openai_result = await asyncio.to_thread(openai_chart, csv_url, query)
+            logger.info(f"OpenAI chart result:", openai_result)
+            if openai_result and openai_result != 'Chart not generated':
+                return await upload_and_return(openai_result)
+            raise Exception("OpenAI failed to generate chart")
+        except Exception as openai_error:
+            logger.info(f"OpenAI failed ({str(openai_error)}), trying raw Groq...")
+            # --- 2. Second Attempt: Raw Groq ---
+            try:
+                groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
+                logger.info(f"Raw Groq chart result:", groq_result)
+                if groq_result and groq_result != 'Chart not generated':
+                    return await upload_and_return(groq_result)
+                raise Exception("Raw Groq failed to generate chart")
+            except Exception as groq_error:
+                logger.info(f"Raw Groq failed ({str(groq_error)}), trying LangChain Gemini...")
+                # --- 3. Third Attempt: LangChain Gemini ---
+                try:
+                    gemini_result = await asyncio.to_thread(
+                        langchain_gemini_csv_handler, csv_url, query, True
+                    )
+                    logger.info("LangChain Gemini chart result:", gemini_result)
+                    # --- i) If Gemini result is a string, return it ---
+                    if gemini_result and isinstance(gemini_result, str):
+                        clean_path = gemini_result.strip()
+                        return await upload_and_return(clean_path)
+                    # --- ii) If Gemini result is a list, return the first element ---
+                    if gemini_result and isinstance(gemini_result, list) and len(gemini_result) > 0:
+                        return await upload_and_return(gemini_result[0])
+                    raise Exception("LangChain Gemini returned empty result")
+                except Exception as gemini_error:
+                    logger.info(f"LangChain Gemini failed ({str(gemini_error)}), trying LangChain Groq...")
+                    # --- 4. Final Attempt: LangChain Groq ---
+                    try:
+                        lc_groq_paths = await asyncio.to_thread(
+                            langchain_csv_chart, csv_url, query, True
+                        )
+                        logger.info("LangChain Groq chart result:", lc_groq_paths)
+                        if isinstance(lc_groq_paths, list) and lc_groq_paths:
+                            return await upload_and_return(lc_groq_paths[0])
+                        return {"error": "All chart generation methods failed"}
+                    except Exception as lc_groq_error:
+                        logger.info(f"LangChain Groq failed: {str(lc_groq_error)}")
+                        return {"error": "Could not generate chart"}
     except Exception as e:
+        logger.info(f"Critical error: {str(e)}")
         return {"error": "Internal system error"}

util_service.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langchain_core.prompts import ChatPromptTemplate
 import numpy as np
-keywords = ["i encountered","unfortunately", "unsupported", "error", "sorry", "response", "unable", "because", "too many"]
 def contains_keywords(text, keywords):
     return any(keyword.lower() in text.lower() for keyword in keywords)

 from langchain_core.prompts import ChatPromptTemplate
 import numpy as np
+keywords = ["i encountered", "429", "unfortunately", "unsupported", "error", "sorry", "response", "unable", "because", "too many"]
 def contains_keywords(text, keywords):
     return any(keyword.lower() in text.lower() for keyword in keywords)