Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Jun 15

Commit

d7d1d4e

1 Parent(s): 1211e2b

added together ai agent

Browse files

Files changed (4) hide show

controller.py +10 -5
python_code_executor_service.py +183 -0
together_ai_instance_provider.py +69 -0
together_ai_llama_agent.py +143 -0

controller.py CHANGED Viewed

@@ -29,6 +29,7 @@ from gemini_report_generator import generate_csv_report
 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 from orchestrator_agent import csv_orchestrator_chat
 from supabase_service import upload_file_to_supabase
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
 import matplotlib
@@ -363,11 +364,15 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
         #       return {"answer": jsonable_encoder(orchestrator_answer)}
         # Process with groq_chat first
-        groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
-        logger.info("groq_answer:", groq_answer)
-        if process_answer(groq_answer) == "Empty response received.":
-            return {"answer": "Sorry, I couldn't find relevant data..."}
         # if process_answer(groq_answer):
         #     lang_answer = await asyncio.to_thread(
@@ -377,7 +382,7 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
         #         return {"answer": "error"}
         #     return {"answer": jsonable_encoder(lang_answer)}
-        return {"answer": jsonable_encoder(groq_answer)}
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")

 from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
 from orchestrator_agent import csv_orchestrator_chat
 from supabase_service import upload_file_to_supabase
+from together_ai_llama_agent import query_csv_agent
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
 import matplotlib
         #       return {"answer": jsonable_encoder(orchestrator_answer)}
         # Process with groq_chat first
+        # groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
+        # logger.info("groq_answer:", groq_answer)
+        result = await asyncio.to_thread(query_csv_agent, decoded_url, query)
+        logger.info("together ai csv answer == >", result)
+        return {"answer": result}
+        # if process_answer(groq_answer) == "Empty response received.":
+        #     return {"answer": "Sorry, I couldn't find relevant data..."}
         # if process_answer(groq_answer):
         #     lang_answer = await asyncio.to_thread(
         #         return {"answer": "error"}
         #     return {"answer": jsonable_encoder(lang_answer)}
+        # return {"answer": jsonable_encoder(groq_answer)}
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")

python_code_executor_service.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import uuid
+import matplotlib.pyplot as plt
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+import pandas as pd
+import json
+import io
+import contextlib
+import traceback
+from pydantic import BaseModel
+class CodeResponse(BaseModel):
+    """Container for code-related responses"""
+    language: str = "python"
+    code: str
+class ChartSpecification(BaseModel):
+    """Details about requested charts"""
+    image_description: str
+    code: Optional[str] = None
+class AnalysisOperation(BaseModel):
+    """Container for a single analysis operation with its code and result"""
+    code: CodeResponse
+    description: str
+class CsvChatResult(BaseModel):
+    """Structured response for CSV-related AI interactions"""
+    response_type: str  # Literal["casual", "data_analysis", "visualization", "mixed"]
+    casual_response: str
+    analysis_operations: List[AnalysisOperation]
+    charts: Optional[List[ChartSpecification]] = None
+class PythonExecutor:
+    """Handles execution of Python code and dummy image generation for CSV analysis"""
+    def __init__(self, df: pd.DataFrame, charts_folder: str = "charts"):
+        """
+        Initialize the PythonExecutor with a DataFrame
+        Args:
+            df (pd.DataFrame): The DataFrame to operate on
+            charts_folder (str): Folder to save charts in
+        """
+        self.df = df
+        self.charts_folder = Path(charts_folder)
+        self.charts_folder.mkdir(exist_ok=True)
+    def execute_code(self, code: str) -> Dict[str, Any]:
+        """
+        Execute Python code and return the output and any generated plots
+        Args:
+            code (str): Python code to execute
+        Returns:
+            dict: Dictionary containing execution results and any generated plots
+        """
+        output = ""
+        error = None
+        plots = []
+        # Capture stdout
+        stdout = io.StringIO()
+        # Monkey patch plt.show() to save figures
+        original_show = plt.show
+        def custom_show():
+            """Custom show function that saves plots instead of displaying them"""
+            for i, fig in enumerate(plt.get_fignums()):
+                figure = plt.figure(fig)
+                # Save plot to bytes buffer
+                buf = io.BytesIO()
+                figure.savefig(buf, format='png', bbox_inches='tight')
+                buf.seek(0)
+                plots.append(buf.read())
+            plt.close('all')
+        try:
+            # Create execution context with common libraries and the DataFrame
+            exec_globals = {
+                'pd': pd,
+                'plt': plt,
+                'json': json,
+                'df': self.df,  # Include the DataFrame in the execution context
+                '__builtins__': __builtins__,
+            }
+            # Replace plt.show with custom implementation
+            plt.show = custom_show
+            # Execute code and capture output
+            with contextlib.redirect_stdout(stdout):
+                exec(code, exec_globals)
+            output = stdout.getvalue()
+        except Exception as e:
+            error = {
+                "message": str(e),
+                "traceback": traceback.format_exc()
+            }
+        finally:
+            # Restore original plt.show
+            plt.show = original_show
+        return {
+            'output': output,
+            'error': error,
+            'plots': plots
+        }
+    def save_plot_dummy(self, plot_data: bytes, description: str) -> str:
+        """
+        Save plot to charts folder and return a dummy URL
+        Args:
+            plot_data (bytes): Image data in bytes
+            description (str): Description of the plot
+        Returns:
+            str: Dummy URL for the chart
+        """
+        # Generate unique filename
+        filename = f"chart_{uuid.uuid4().hex}.png"
+        filepath = self.charts_folder / filename
+        # Save the plot (even though we're using dummy URLs, we still save it)
+        with open(filepath, 'wb') as f:
+            f.write(plot_data)
+        # Return a dummy URL
+        return f"https://example.com/charts/{filename}"
+    def process_response(self, response: CsvChatResult) -> str:
+        """
+        Process the CsvChatResult response and generate formatted output
+        Args:
+            response (CsvChatResult): Response from CSV analysis
+        Returns:
+            str: Formatted output with results and dummy image URLs
+        """
+        output_parts = []
+        # Add casual response
+        output_parts.append(response.casual_response)
+        # Process analysis operations
+        for operation in response.analysis_operations:
+            # Execute the code
+            result = self.execute_code(operation.code.code)
+            # Add operation description
+            output_parts.append(f"\n{operation.description}:")
+            # Add output or error
+            if result['error']:
+                output_parts.append(f"Error: {result['error']['message']}")
+            else:
+                output_parts.append(result['output'].strip())
+        # Process charts if they exist
+        if response.charts:
+            output_parts.append("\nVisualizations:")
+            for chart in response.charts:
+                if chart.code:
+                    # Execute the chart code
+                    result = self.execute_code(chart.code)
+                    if result['plots']:
+                        # Save each generated plot and get dummy URL
+                        for plot_data in result['plots']:
+                            dummy_url = self.save_plot_dummy(plot_data, chart.image_description)
+                            output_parts.append(f"\n{chart.image_description}")
+                            output_parts.append(f"![{chart.image_description}]({dummy_url})")
+                    elif result['error']:
+                        output_parts.append(f"\nError generating {chart.image_description}: {result['error']['message']}")
+        return "\n".join(output_parts)

together_ai_instance_provider.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# instance_provider.py
+import os
+import time
+from typing import Dict, Optional
+from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.providers.openai import OpenAIProvider
+class InstanceProvider:
+    """Manages multiple Together AI API instances with failover support"""
+    def __init__(self):
+        self.instances: Dict[str, dict] = {}
+        self.locked_keys: Dict[str, float] = {}  # key: lock_time
+        self.LOCK_DURATION = 1800  # 30 minutes in seconds
+        self._initialize_instances()
+    def _initialize_instances(self):
+        """Load all API keys from environment and create instances"""
+        api_keys = os.getenv("TOGETHER_AI_API_KEYS", "").split(",")
+        base_url = os.getenv("TOGETHER_AI_BASE_URL")
+        model_name = os.getenv("TOGETHER_AI_LLM_MODEL_NAME")
+        for key in api_keys:
+            key = key.strip()
+            if key:
+                self.instances[key] = {
+                    'model': OpenAIModel(
+                        model_name,
+                        provider=OpenAIProvider(
+                            base_url=base_url,
+                            api_key=key
+                        )
+                    ),
+                    'error_count': 0
+                }
+    def _clean_locked_keys(self):
+        """Remove keys that have been locked beyond the duration"""
+        current_time = time.time()
+        expired_keys = [
+            key for key, lock_time in self.locked_keys.items()
+            if current_time - lock_time > self.LOCK_DURATION
+        ]
+        for key in expired_keys:
+            del self.locked_keys[key]
+    def get_instance(self) -> Optional[OpenAIModel]:
+        """Get an available instance, rotating through keys"""
+        self._clean_locked_keys()
+        for key, instance_data in self.instances.items():
+            if key not in self.locked_keys:
+                return instance_data['model']
+        # If we get here, all keys are locked
+        raise RuntimeError("All API keys exhausted or temporarily locked")
+    def report_error(self, api_key: str):
+        """Report an error for a specific API key and lock it"""
+        if api_key in self.instances:
+            self.instances[api_key]['error_count'] += 1
+            self.locked_keys[api_key] = time.time()
+    def get_api_key_for_model(self, model: OpenAIModel) -> Optional[str]:
+        """Get the API key for a given model instance"""
+        for key, instance_data in self.instances.items():
+            if instance_data['model'] == model:
+                return key
+        return None

together_ai_llama_agent.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import pandas as pd
+import json
+from typing import List, Literal, Optional
+from pydantic import BaseModel
+from dotenv import load_dotenv
+from pydantic_ai import Agent
+from csv_service import clean_data
+from python_code_executor_service import PythonExecutor
+from together_ai_instance_provider import InstanceProvider
+load_dotenv()
+instance_provider = InstanceProvider()
+class CodeResponse(BaseModel):
+    """Container for code-related responses"""
+    language: str = "python"
+    code: str
+class ChartSpecification(BaseModel):
+    """Details about requested charts"""
+    image_description: str
+    code: Optional[str] = None
+class AnalysisOperation(BaseModel):
+    """Container for a single analysis operation with its code and result"""
+    code: CodeResponse
+    description: str
+class CsvChatResult(BaseModel):
+    """Structured response for CSV-related AI interactions"""
+    response_type: Literal["casual", "data_analysis", "visualization", "mixed"]
+    # Casual chat response
+    casual_response: str
+    # Data analysis components
+    analysis_operations: List[AnalysisOperation]
+    # Visualization components
+    charts: Optional[List[ChartSpecification]] = None
+def get_csv_info(df: pd.DataFrame) -> dict:
+    """Get metadata/info about the CSV"""
+    info = {
+        'num_rows': len(df),
+        'num_cols': len(df.columns),
+        'example_rows': df.head(2).to_dict('records'),
+        'dtypes': {col: str(df[col].dtype) for col in df.columns},
+        'columns': list(df.columns),
+        'numeric_columns': [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])],
+        'categorical_columns': [col for col in df.columns if pd.api.types.is_string_dtype(df[col])]
+    }
+    return info
+def get_csv_system_prompt(df: pd.DataFrame) -> str:
+    """Generate system prompt for CSV analysis"""
+    csv_info = get_csv_info(df)
+    prompt = f"""
+You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
+CSV Info:
+- Rows: {csv_info['num_rows']}, Cols: {csv_info['num_cols']}
+- Columns: {csv_info['columns']}
+- Sample: {csv_info['example_rows']}
+- Dtypes: {csv_info['dtypes']}
+Strict Rules:
+1. Never recreate 'df' - use the existing variable
+2. For analysis:
+   - Include necessary imports (except pandas) and include complete code
+   - Use df directly (e.g., print(df[...].mean()))
+3. For visualizations:
+   - Specify chart type and include complete code
+   - Example: plt.bar(df['x'], df['y'])
+4. For Lists and Dictionaries, return them as JSON
+Example:
+import json
+print(json.dumps(df[df['col'] == 'val'].to_dict('records'), indent=2))
+"""
+    return prompt
+def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:
+    """Create and return a CSV analysis agent with API key rotation"""
+    csv_system_prompt = get_csv_system_prompt(df)
+    for attempt in range(max_retries):
+        try:
+            model = instance_provider.get_instance()
+            if model is None:
+                raise RuntimeError("No available API instances")
+            csv_agent = Agent(
+                model=model,
+                output_type=CsvChatResult,
+                system_prompt=csv_system_prompt,
+            )
+            return csv_agent
+        except Exception as e:
+            api_key = instance_provider.get_api_key_for_model(model)
+            if api_key:
+                print(f"Error with API key (attempt {attempt + 1}): {str(e)}")
+                instance_provider.report_error(api_key)
+            continue
+    raise RuntimeError(f"Failed to create agent after {max_retries} attempts")
+async def query_csv_agent(csv_url: str, question: str) -> str:
+    """Query the CSV agent with a DataFrame and question and return formatted output"""
+    # Get the DataFrame from the CSV URL
+    df = clean_data(csv_url)
+    # Create agent and get response
+    agent = create_csv_agent(df)
+    result = await agent.run(question)
+    # Process the response through PythonExecutor
+    executor = PythonExecutor(df)
+    # Convert the raw output to CsvChatResult if needed
+    if not isinstance(result.output, CsvChatResult):
+        # Handle case where output needs conversion
+        try:
+            response_data = result.output if isinstance(result.output, dict) else json.loads(result.output)
+            chat_result = CsvChatResult(**response_data)
+        except Exception as e:
+            raise ValueError(f"Could not parse agent response: {str(e)}")
+    else:
+        chat_result = result.output
+    # Process and format the response
+    formatted_output = executor.process_response(chat_result)
+    return formatted_output