Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Mar 19

Commit

4fbcf68

1 Parent(s): 45e593b

put middle orchestrator

Browse files

Files changed (3) hide show

controller.py +17 -0
orchestrator_agent.py +94 -0
orchestrator_functions.py +381 -0

controller.py CHANGED Viewed

@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 from supabase_service import upload_image_to_supabase
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
@@ -306,6 +307,14 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
             )
             logger.info("langchain_answer:", answer)
             return {"answer": jsonable_encoder(answer)}
         # Process with groq_chat first
         groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
@@ -802,6 +811,14 @@ async def csv_chart(request: dict, authorization: str = Header(None)):
                 logger.info("Image uploaded to Supabase and Image URL is... ", {image_public_url})
                 return {"image_url": image_public_url}
                # return FileResponse(langchain_result[0], media_type="image/png")
         # Next, try the groq-based method
         groq_result = await loop.run_in_executor(

 import matplotlib
 import seaborn as sns
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
+from orchestrator_agent import csv_orchestrator_chat
 from supabase_service import upload_image_to_supabase
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
             )
             logger.info("langchain_answer:", answer)
             return {"answer": jsonable_encoder(answer)}
+        # Orchestrate the execution
+        orchestrator_answer = await asyncio.to_thread(
+            csv_orchestrator_chat, decoded_url, query
+        )
+        if orchestrator_answer is not None:
+            return {"answer": jsonable_encoder(orchestrator_answer)}
         # Process with groq_chat first
         groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
                 logger.info("Image uploaded to Supabase and Image URL is... ", {image_public_url})
                 return {"image_url": image_public_url}
                # return FileResponse(langchain_result[0], media_type="image/png")
+        # Use orchestrator to handle the user's chart query first
+        orchestrator_answer = await asyncio.to_thread(
+            process_executor,csv_orchestrator_chat, csv_url, query
+        )
+        if orchestrator_answer is not None:
+            return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
         # Next, try the groq-based method
         groq_result = await loop.run_in_executor(

orchestrator_agent.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+from typing import Dict, List, Any
+from pydantic_ai import Agent
+from pydantic_ai.models.gemini import GeminiModel
+from pydantic_ai.providers.google_gla import GoogleGLAProvider
+from pydantic_ai import RunContext
+from pydantic import BaseModel
+from google.api_core.exceptions import ResourceExhausted  # Import the exception for quota exhaustion
+from csv_service import get_csv_basic_info
+from orchestrator_functions import csv_chart, csv_chat
+# Load all API keys from the environment variable
+GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")  # Expecting a comma-separated list of keys
+# Function to initialize the model with a specific API key
+def initialize_model(api_key: str) -> GeminiModel:
+    return GeminiModel(
+        'gemini-2.0-flash',
+        provider=GoogleGLAProvider(api_key=api_key)
+    )
+# Define the tools
+async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
+    print("LLM using the csv chat function....")
+    print("CSV URL:", csv_url)
+    print("User question:", user_questions)
+    # Create an array to accumulate the answers
+    answers = []
+    # Loop through the user questions and generate answers for each
+    for question in user_questions:
+        answer = await csv_chat(csv_url, question)
+        answers.append(dict(question=question, answer=answer))
+    return answers
+async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
+    print("LLM using the csv chart function....")
+    print("CSV URL:", csv_url)
+    print("User question:", user_questions)
+    # Create an array to accumulate the charts
+    charts = []
+    # Loop through the user questions and generate charts for each
+    for question in user_questions:
+        chart = await csv_chart(csv_url, question)
+        charts.append(dict(question=question, image_url=chart))
+    return charts
+# Function to create an agent with a specific CSV URL
+def create_agent(csv_url: str, api_key: str) -> Agent:
+    csv_metadata = get_csv_basic_info(csv_url)
+    system_prompt = (
+        "You are a data analyst."
+        "You have all the tools you need to answer any question."
+        "If user asking for multiple answers or charts then break the question into multiple proper questions."
+        "Pass csv_url/path with the questions to the tools to generate the answer."
+        "Explain the answer in a friendly way."
+        "**Format images** in Markdown: `![alt_text](direct-image-url)`"
+        f"Your csv url is {csv_url}"
+        f"Your csv metadata is {csv_metadata}"
+    )
+    return Agent(
+        model=initialize_model(api_key),
+        deps_type=str,
+        tools=[generate_csv_answer, generate_chart],
+        system_prompt=system_prompt
+    )
+def csv_orchestrator_chat(csv_url: str, user_question: str) -> str:
+    print("CSV URL:", csv_url)
+    print("User questions:", user_question)
+    # Iterate through all API keys
+    for api_key in GEMINI_API_KEYS:
+        try:
+            print(f"Attempting with API key: {api_key}")
+            agent = create_agent(csv_url, api_key)
+            result = agent.run_sync(user_question)
+            print("Orchestrator Result:", result.data)
+            return result.data
+        except ResourceExhausted or Exception as e:
+            print(f"Quota exhausted for API key: {api_key}. Switching to the next key.")
+            continue  # Move to the next key
+        except Exception as e:
+            print(f"Error with API key {api_key}: {e}")
+            continue  # Move to the next key
+    # If all keys are exhausted or fail
+    print("All API keys have been exhausted or failed.")
+    return None

orchestrator_functions.py ADDED Viewed

	@@ -0,0 +1,381 @@

+# Import necessary modules
+import asyncio
+import os
+import threading
+import uuid
+from fastapi.encoders import jsonable_encoder
+import numpy as np
+import pandas as pd
+from pandasai import SmartDataframe
+from langchain_groq.chat_models import ChatGroq
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from csv_service import clean_data, extract_chart_filenames
+from langchain_groq import ChatGroq
+import pandas as pd
+from langchain_experimental.tools import PythonAstREPLTool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+import seaborn as sns
+from supabase_service import upload_image_to_supabase
+from util_service import _prompt_generator, process_answer
+import matplotlib
+matplotlib.use('Agg')
+load_dotenv()
+image_file_path = os.getenv("IMAGE_FILE_PATH")
+image_not_found = os.getenv("IMAGE_NOT_FOUND")
+allowed_hosts = os.getenv("ALLOWED_HOSTS", "").split(",")
+# Load environment variables
+groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
+model_name = os.getenv("GROQ_LLM_MODEL")
+class CsvUrlRequest(BaseModel):
+    csv_url: str
+class ImageRequest(BaseModel):
+    image_path: str
+class CsvCommonHeadersRequest(BaseModel):
+  file_urls: list[str]
+class CsvsMergeRequest(BaseModel):
+    file_urls: list[str]
+    merge_type: str
+    common_columns_name: list[str]
+# Thread-safe key management for groq_chat
+current_groq_key_index = 0
+current_groq_key_lock = threading.Lock()
+# Thread-safe key management for langchain_csv_chat
+current_langchain_key_index = 0
+current_langchain_key_lock = threading.Lock()
+# CHAT CODING STARTS FROM HERE
+# Modified groq_chat function with thread-safe key rotation
+def groq_chat(csv_url: str, question: str):
+    global current_groq_key_index, current_groq_key_lock
+    while True:
+        with current_groq_key_lock:
+            if current_groq_key_index >= len(groq_api_keys):
+                return {"error": "All API keys exhausted."}
+            current_api_key = groq_api_keys[current_groq_key_index]
+        try:
+            # Delete cache file if exists
+            cache_db_path = "/workspace/cache/cache_db_0.11.db"
+            if os.path.exists(cache_db_path):
+                try:
+                    os.remove(cache_db_path)
+                except Exception as e:
+                    print(f"Error deleting cache DB file: {e}")
+            data = clean_data(csv_url)
+            llm = ChatGroq(model=model_name, api_key=current_api_key)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question)
+            # Process different response types
+            if isinstance(answer, pd.DataFrame):
+                processed = answer.apply(handle_out_of_range_float).to_dict(orient="records")
+            elif isinstance(answer, pd.Series):
+                processed = answer.apply(handle_out_of_range_float).to_dict()
+            elif isinstance(answer, list):
+                processed = [handle_out_of_range_float(item) for item in answer]
+            elif isinstance(answer, dict):
+                processed = {k: handle_out_of_range_float(v) for k, v in answer.items()}
+            else:
+                processed = {"answer": str(handle_out_of_range_float(answer))}
+            return processed
+        except Exception as e:
+            error_message = str(e)
+            if "429" in error_message:
+                with current_groq_key_lock:
+                    current_groq_key_index += 1
+                    if current_groq_key_index >= len(groq_api_keys):
+                        return {"error": "All API keys exhausted."}
+            else:
+                return {"error": error_message}
+# Modified langchain_csv_chat with thread-safe key rotation
+def langchain_csv_chat(csv_url: str, question: str, chart_required: bool):
+    global current_langchain_key_index, current_langchain_key_lock
+    data = clean_data(csv_url)
+    attempts = 0
+    while attempts < len(groq_api_keys):
+        with current_langchain_key_lock:
+            if current_langchain_key_index >= len(groq_api_keys):
+                current_langchain_key_index = 0
+            api_key = groq_api_keys[current_langchain_key_index]
+            current_key = current_langchain_key_index
+            current_langchain_key_index += 1
+            attempts += 1
+        try:
+            llm = ChatGroq(model=model_name, api_key=api_key)
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,
+                "sns": sns,
+                "matplotlib": matplotlib
+            })
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="openai-tools",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            prompt = _prompt_generator(question, chart_required)
+            result = agent.invoke({"input": prompt})
+            return result.get("output")
+        except Exception as e:
+            print(f"Error with key index {current_key}: {str(e)}")
+    return {"error": "All API keys exhausted"}
+def handle_out_of_range_float(value):
+    if isinstance(value, float):
+        if np.isnan(value):
+            return None
+        elif np.isinf(value):
+            return "Infinity"
+    return value
+# CHART CODING STARTS FROM HERE
+instructions = """
+- Please ensure that each value is clearly visible, You may need to adjust the font size, rotate the labels, or use truncation to improve readability (if needed).
+- For multiple charts, arrange them in a grid format (2x2, 3x3, etc.)
+- Use colorblind-friendly palette
+- Read above instructions and follow them.
+"""
+# Thread-safe configuration for chart endpoints
+current_groq_chart_key_index = 0
+current_groq_chart_lock = threading.Lock()
+current_langchain_chart_key_index = 0
+current_langchain_chart_lock = threading.Lock()
+def model():
+    global current_groq_chart_key_index, current_groq_chart_lock
+    with current_groq_chart_lock:
+        if current_groq_chart_key_index >= len(groq_api_keys):
+            raise Exception("All API keys exhausted for chart generation")
+        api_key = groq_api_keys[current_groq_chart_key_index]
+    return ChatGroq(model=model_name, api_key=api_key)
+def groq_chart(csv_url: str, question: str):
+    global current_groq_chart_key_index, current_groq_chart_lock
+    for attempt in range(len(groq_api_keys)):
+        try:
+            # Clean cache before processing
+            cache_db_path = "/workspace/cache/cache_db_0.11.db"
+            if os.path.exists(cache_db_path):
+                try:
+                    os.remove(cache_db_path)
+                except Exception as e:
+                    print(f"Cache cleanup error: {e}")
+            data = clean_data(csv_url)
+            with current_groq_chart_lock:
+                current_api_key = groq_api_keys[current_groq_chart_key_index]
+            llm = ChatGroq(model=model_name, api_key=current_api_key)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question + instructions)
+            if process_answer(answer):
+                return "Chart not generated"
+            return answer
+        except Exception as e:
+            error = str(e)
+            if "429" in error:
+                with current_groq_chart_lock:
+                    current_groq_chart_key_index = (current_groq_chart_key_index + 1) % len(groq_api_keys)
+            else:
+                print(f"Chart generation error: {error}")
+                return {"error": error}
+    return {"error": "All API keys exhausted for chart generation"}
+def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
+    global current_langchain_chart_key_index, current_langchain_chart_lock
+    data = clean_data(csv_url)
+    for attempt in range(len(groq_api_keys)):
+        try:
+            with current_langchain_chart_lock:
+                api_key = groq_api_keys[current_langchain_chart_key_index]
+                current_key = current_langchain_chart_key_index
+                current_langchain_chart_key_index = (current_langchain_chart_key_index + 1) % len(groq_api_keys)
+            llm = ChatGroq(model=model_name, api_key=api_key)
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,
+                "sns": sns,
+                "matplotlib": matplotlib,
+                "uuid": uuid
+            })
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="openai-tools",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            result = agent.invoke({"input": _prompt_generator(question, True)})
+            output = result.get("output", "")
+            # Verify chart file creation
+            chart_files = extract_chart_filenames(output)
+            if len(chart_files) > 0:
+                return chart_files
+            if attempt < len(groq_api_keys) - 1:
+                print(f"Langchain chart error (key {current_key}): {output}")
+        except Exception as e:
+            print(f"Langchain chart error (key {current_key}): {str(e)}")
+    return "Chart generation failed after all retries"
+###########################################################################################################################
+async def csv_chart(csv_url: str, query: str):
+    try:
+        # Groq-based chart generation
+        groq_result = await asyncio.to_thread(groq_chart, csv_url, query)
+        print(f"Generated Chart: {groq_result}")
+        if groq_result != 'Chart not generated':
+            unique_file_name =f'{str(uuid.uuid4())}.png'
+            image_public_url = await upload_image_to_supabase(f"{groq_result}", unique_file_name)
+            print(f"Image uploaded to Supabase: {image_public_url}")
+            return {"image_url": image_public_url}
+        else:
+           return {"error": "All chart generation methods failed"}
+    except Exception as e:
+        print(f"Critical chart error: {str(e)}")
+        return {"error": "Internal system error"}
+async def csv_chat(csv_url: str, query: str):
+    try:
+        # Process with groq_chat first
+        groq_answer = await asyncio.to_thread(groq_chat, csv_url, query)
+        print("groq_answer:", groq_answer)
+        if process_answer(groq_answer) == "Empty response received.":
+            return {"answer": "Sorry, I couldn't find relevant data..."}
+        if process_answer(groq_answer):
+            lang_answer = await asyncio.to_thread(
+                langchain_csv_chat, csv_url, query, False
+            )
+            if process_answer(lang_answer):
+                return {"answer": "error"}
+            return {"answer": jsonable_encoder(lang_answer)}
+        return {"answer": jsonable_encoder(groq_answer)}
+    except Exception as e:
+        print(f"Error processing request: {str(e)}")
+        return {"answer": "error"}
+def handle_out_of_range_float(value):
+    if isinstance(value, float):
+        if np.isnan(value):
+            return None
+        elif np.isinf(value):
+            return "Infinity"
+    return value