import uuid import matplotlib.pyplot as plt from pathlib import Path from typing import Dict, Any, List, Optional import pandas as pd import numpy as np import json import io import contextlib import traceback import time from datetime import datetime, timedelta import seaborn as sns import scipy.stats as stats from pydantic import BaseModel class CodeResponse(BaseModel): """Container for code-related responses""" language: str = "python" code: str class ChartSpecification(BaseModel): """Details about requested charts""" image_description: str code: Optional[str] = None class AnalysisOperation(BaseModel): """Container for a single analysis operation with its code and result""" code: CodeResponse description: str class CsvChatResult(BaseModel): """Structured response for CSV-related AI interactions""" response_type: str # Literal["casual", "data_analysis", "visualization", "mixed"] casual_response: str analysis_operations: List[AnalysisOperation] charts: Optional[List[ChartSpecification]] = None class PythonExecutor: """Handles execution of Python code with comprehensive data analysis libraries""" def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"): """ Initialize the PythonExecutor with a DataFrame Args: df (pd.DataFrame): The DataFrame to operate on charts_folder (str): Folder to save charts in """ self.df = df self.charts_folder = Path(charts_folder) self.charts_folder.mkdir(exist_ok=True) def execute_code(self, code: str) -> Dict[str, Any]: """ Execute Python code with full data analysis context and return results Args: code (str): Python code to execute Returns: dict: Dictionary containing execution results and any generated plots """ output = "" error = None plots = [] # Capture stdout stdout = io.StringIO() # Monkey patch plt.show() to save figures original_show = plt.show def custom_show(): """Custom show function that saves plots instead of displaying them""" for i, fig in enumerate(plt.get_fignums()): figure = plt.figure(fig) # Save plot to bytes buffer buf = io.BytesIO() figure.savefig(buf, format='png', bbox_inches='tight') buf.seek(0) plots.append(buf.read()) plt.close('all') try: # Create comprehensive execution context with data analysis libraries exec_globals = { # Core data analysis 'pd': pd, 'np': np, 'df': self.df, # Visualization 'plt': plt, 'sns': sns, # Statistics 'stats': stats, # Date/time 'datetime': datetime, 'timedelta': timedelta, 'time': time, # Utilities 'json': json, '__builtins__': __builtins__, } # Replace plt.show with custom implementation plt.show = custom_show # Execute code and capture output with contextlib.redirect_stdout(stdout): exec(code, exec_globals) output = stdout.getvalue() except Exception as e: error = { "message": str(e), "traceback": traceback.format_exc() } finally: # Restore original plt.show plt.show = original_show return { 'output': output, 'error': error, 'plots': plots } def save_plot_dummy(self, plot_data: bytes, description: str) -> str: """ Save plot to charts folder and return a dummy URL Args: plot_data (bytes): Image data in bytes description (str): Description of the plot Returns: str: Dummy URL for the chart """ # Generate unique filename filename = f"chart_{uuid.uuid4().hex}.png" filepath = self.charts_folder / filename # Save the plot (even though we're using dummy URLs, we still save it) with open(filepath, 'wb') as f: f.write(plot_data) # Return a dummy URL return f"https://example.com/charts/{filename}" # def process_response(self, response: CsvChatResult) -> str: # """ # Process the CsvChatResult response and generate formatted output # Args: # response (CsvChatResult): Response from CSV analysis # Returns: # str: Formatted output with results and dummy image URLs # """ # output_parts = [] # # Add casual response # output_parts.append(response.casual_response) # # Process analysis operations # for operation in response.analysis_operations: # # Execute the code # result = self.execute_code(operation.code.code) # # Add operation description # output_parts.append(f"\n{operation.description}:") # # Add output or error # if result['error']: # output_parts.append(f"Error: {result['error']['message']}") # else: # output_parts.append(result['output'].strip()) # # Process charts if they exist # if response.charts: # output_parts.append("\nVisualizations:") # for chart in response.charts: # if chart.code: # # Execute the chart code # result = self.execute_code(chart.code) # if result['plots']: # # Save each generated plot and get dummy URL # for plot_data in result['plots']: # dummy_url = self.save_plot_dummy(plot_data, chart.image_description) # output_parts.append(f"\n{chart.image_description}") # output_parts.append(f"![{chart.image_description}]({dummy_url})") # elif result['error']: # output_parts.append(f"\nError generating {chart.image_description}: {result['error']['message']}") # return "\n".join(output_parts) def process_response(self, response: CsvChatResult) -> str: """ Process the CsvChatResult response and generate formatted output with markdown code blocks for structured data. """ output_parts = [] # Add casual response output_parts.append(response.casual_response) # Process analysis operations for operation in response.analysis_operations: # Execute the code result = self.execute_code(operation.code.code) # Add operation description output_parts.append(f"\n{operation.description}:") # Add output or error with markdown wrapping if result['error']: output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```") else: output = result['output'].strip() if self._looks_like_structured_data(output): # New helper method output_parts.append("```python\n" + output + "\n```") else: output_parts.append(output) # Process charts remains the same if response.charts: output_parts.append("\nVisualizations:") for chart in response.charts: if chart.code: result = self.execute_code(chart.code) if result['plots']: for plot_data in result['plots']: dummy_url = self.save_plot_dummy(plot_data, chart.image_description) output_parts.append(f"\n{chart.image_description}") output_parts.append(f"![{chart.image_description}]({dummy_url})") elif result['error']: output_parts.append("```python\n" + f"Error generating {chart.image_description}: {result['error']['message']}" + "\n```") return "\n".join(output_parts) def _looks_like_structured_data(self, output: str) -> bool: """Helper to detect JSON-like or array-like output""" output = output.strip() return ( output.startswith('{') and output.endswith('}') or # JSON object output.startswith('[') and output.endswith(']') or # Array '\n' in output and '=' in output # Python console output )