Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Jul 10

Commit

dab2720

1 Parent(s): 7d5e274

DB Chat for pro users only

Browse files

Files changed (1) hide show

python_code_executor_service.py +128 -103

python_code_executor_service.py CHANGED Viewed

@@ -16,69 +16,23 @@ import seaborn as sns
 import scipy.stats as stats
 from pydantic import BaseModel
 from tabulate import tabulate
-import re
 from supabase_service import upload_file_to_supabase
 # Load environment variables from .env file
 load_dotenv()
-class TextCleaner:
-    """Utility class for cleaning text while preserving structure"""
-    @staticmethod
-    def clean_casual_response(text: str) -> str:
-        """Clean casual response by replacing newlines with spaces when appropriate"""
-        # Preserve intentional line breaks (markdown-style)
-        if '\n\n' in text:
-            return text
-        return text.replace('\n', ' ').strip()
-    @staticmethod
-    def clean_code(code: str) -> str:
-        """Remove trailing newlines while preserving internal structure"""
-        return code.rstrip('\n')
-    @staticmethod
-    def clean_description(description: str) -> str:
-        """Replace newlines in description with spaces (preserves readability)"""
-        return description.replace('\n', ' ').strip()
-    @staticmethod
-    def format_result(result: Any) -> str:
-        """Format result with safe newline handling"""
-        if isinstance(result, (pd.DataFrame, pd.Series)):
-            return result.to_string()
-        if isinstance(result, (dict, list)):
-            return json.dumps(result, indent=2)
-        # Clean string representation while preserving essential newlines
-        str_result = str(result)
-        if '\n' in str_result and not any(x in str_result for x in ['```', 'def ', 'class ']):
-            return str_result.replace('\n', ' ')
-        return str_result
 class CodeResponse(BaseModel):
     """Container for code-related responses"""
     language: str = "python"
     code: str
-    def clean_code(self) -> str:
-        """Delegate to TextCleaner"""
-        return TextCleaner.clean_code(self.code)
 class ChartSpecification(BaseModel):
     """Details about requested charts"""
     image_description: str
     code: Optional[str] = None
-    def clean_description(self) -> str:
-        """Delegate to TextCleaner"""
-        return TextCleaner.clean_description(self.image_description)
 class AnalysisOperation(BaseModel):
     """Container for a single analysis operation with its code and result"""
@@ -93,30 +47,48 @@ class CsvChatResult(BaseModel):
     analysis_operations: List[AnalysisOperation]
     charts: Optional[List[ChartSpecification]] = None
-    def clean_casual_response(self) -> str:
-        """Delegate to TextCleaner"""
-        return TextCleaner.clean_casual_response(self.casual_response)
 class PythonExecutor:
     """Handles execution of Python code with comprehensive data analysis libraries"""
     def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
         self.df = df
         self.charts_folder = Path(charts_folder)
         self.charts_folder.mkdir(exist_ok=True)
         self.exec_locals = {}
     def execute_code(self, code: str) -> Dict[str, Any]:
         output = ""
         error = None
         plots = []
         stdout = io.StringIO()
         original_show = plt.show
         def custom_show():
             for i, fig in enumerate(plt.get_fignums()):
                 figure = plt.figure(fig)
                 buf = io.BytesIO()
                 figure.savefig(buf, format='png', bbox_inches='tight')
                 buf.seek(0)
@@ -124,22 +96,47 @@ class PythonExecutor:
             plt.close('all')
         try:
             exec_globals = {
-                'pd': pd, 'np': np, 'df': self.df,
-                'plt': plt, 'sns': sns, 'tabulate': tabulate,
-                'stats': stats, 'datetime': datetime,
-                'timedelta': timedelta, 'time': time,
-                'json': json, '__builtins__': __builtins__,
             }
             plt.show = custom_show
             with contextlib.redirect_stdout(stdout):
                 exec(code, exec_globals, self.exec_locals)
             output = stdout.getvalue()
         except Exception as e:
-            error = {"message": str(e), "traceback": traceback.format_exc()}
         finally:
             plt.show = original_show
         return {
@@ -150,74 +147,102 @@ class PythonExecutor:
         }
     async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
         filename = f"chart_{uuid.uuid4().hex}.png"
         filepath = self.charts_folder / filename
         with open(filepath, 'wb') as f:
             f.write(plot_data)
         try:
             public_url = await upload_file_to_supabase(
                 file_path=str(filepath),
                 file_name=filename,
                 chat_id=chat_id
             )
             os.remove(filepath)
             return public_url
         except Exception as e:
             if os.path.exists(filepath):
                 os.remove(filepath)
             raise Exception(f"Failed to upload plot to Supabase: {e}")
     def _format_result(self, result: Any) -> str:
-        """Delegate to TextCleaner"""
-        return TextCleaner.format_result(result)
     async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
-        """Process response with intelligent newline handling"""
-        output_parts = [response.clean_casual_response()]
-        # Process analysis operations
-        for operation in response.analysis_operations:
-            execution_result = self.execute_code(operation.code.clean_code())
-            result = self.exec_locals.get(operation.result_var)
-            if execution_result['error']:
-                output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
-                output_parts.append(f"```python\n{execution_result['error']['message']}\n```")
-            elif result is not None:
-                if result is None or (hasattr(result, '__len__') and len(result) == 0):
-                    output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
-                else:
-                    output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
-                    output_parts.append(f"```python\n{self._format_result(result)}\n```")
-            else:
-                output_str = execution_result['output'].strip()
-                if output_str:
-                    output_parts.append(f"\nOutput for '{operation.result_var}':")
-                    output_parts.append(f"```\n{output_str}\n```")
-        # Process charts
-        if response.charts:
-            output_parts.append("\n📊 Visualizations:")
-            for chart in response.charts:
-                if chart.code:
-                    chart_result = self.execute_code(chart.code)
-                    if chart_result['plots']:
-                        for plot_data in chart_result['plots']:
-                            try:
-                                public_url = await self.save_plot_to_supabase(
-                                    plot_data=plot_data,
-                                    description=chart.clean_description(),
-                                    chat_id=chat_id
-                                )
-                                output_parts.append(f"\n🖼️ {chart.clean_description()}")
-                                output_parts.append(f"![{chart.clean_description()}]({public_url})")
-                            except Exception as e:
-                                output_parts.append(f"\n⚠️ Error uploading chart: {str(e)}")
-                    elif chart_result['error']:
-                        output_parts.append(f"```python\nError generating chart: {chart_result['error']['message']}\n```")
-                    else:
-                        output_parts.append(f"\n⚠️ No chart generated for '{chart.clean_description()}'")
-        return "\n".join(output_parts)

 import scipy.stats as stats
 from pydantic import BaseModel
 from tabulate import tabulate
 from supabase_service import upload_file_to_supabase
 # Load environment variables from .env file
 load_dotenv()
 class CodeResponse(BaseModel):
     """Container for code-related responses"""
     language: str = "python"
     code: str
 class ChartSpecification(BaseModel):
     """Details about requested charts"""
     image_description: str
     code: Optional[str] = None
 class AnalysisOperation(BaseModel):
     """Container for a single analysis operation with its code and result"""
     analysis_operations: List[AnalysisOperation]
     charts: Optional[List[ChartSpecification]] = None
 class PythonExecutor:
     """Handles execution of Python code with comprehensive data analysis libraries"""
     def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
+        """
+        Initialize the PythonExecutor with a DataFrame
+        Args:
+            df (pd.DataFrame): The DataFrame to operate on
+            charts_folder (str): Folder to save charts in
+        """
         self.df = df
         self.charts_folder = Path(charts_folder)
         self.charts_folder.mkdir(exist_ok=True)
         self.exec_locals = {}
     def execute_code(self, code: str) -> Dict[str, Any]:
+        """
+        Execute Python code with full data analysis context and return results
+        Args:
+            code (str): Python code to execute
+        Returns:
+            dict: Dictionary containing execution results and any generated plots
+        """
         output = ""
         error = None
         plots = []
+        # Capture stdout
         stdout = io.StringIO()
+        # Monkey patch plt.show() to save figures
         original_show = plt.show
         def custom_show():
+            """Custom show function that saves plots instead of displaying them"""
             for i, fig in enumerate(plt.get_fignums()):
                 figure = plt.figure(fig)
+                # Save plot to bytes buffer
                 buf = io.BytesIO()
                 figure.savefig(buf, format='png', bbox_inches='tight')
                 buf.seek(0)
             plt.close('all')
         try:
+            # Create comprehensive execution context with data analysis libraries
             exec_globals = {
+                # Core data analysis
+                'pd': pd,
+                'np': np,
+                'df': self.df,
+                # Visualization
+                'plt': plt,
+                'sns': sns,
+                'tabulate': tabulate,
+                # Statistics
+                'stats': stats,
+                # Date/time
+                'datetime': datetime,
+                'timedelta': timedelta,
+                'time': time,
+                # Utilities
+                'json': json,
+                '__builtins__': __builtins__,
             }
+            # Replace plt.show with custom implementation
             plt.show = custom_show
+            # Execute code and capture output
             with contextlib.redirect_stdout(stdout):
                 exec(code, exec_globals, self.exec_locals)
             output = stdout.getvalue()
         except Exception as e:
+            error = {
+                "message": str(e),
+                "traceback": traceback.format_exc()
+            }
         finally:
+            # Restore original plt.show
             plt.show = original_show
         return {
         }
     async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
+        """
+        Save plot to Supabase storage and return the public URL
+        Args:
+            plot_data (bytes): Image data in bytes
+            description (str): Description of the plot
+            chat_id (str): ID of the chat session
+        Returns:
+            str: Public URL of the uploaded chart
+        """
+        # Generate unique filename
         filename = f"chart_{uuid.uuid4().hex}.png"
         filepath = self.charts_folder / filename
+        # Save the plot locally first
         with open(filepath, 'wb') as f:
             f.write(plot_data)
         try:
+            # Upload to Supabase
             public_url = await upload_file_to_supabase(
                 file_path=str(filepath),
                 file_name=filename,
                 chat_id=chat_id
             )
+            # Remove the local file after upload
             os.remove(filepath)
             return public_url
         except Exception as e:
+            # Clean up local file if upload fails
             if os.path.exists(filepath):
                 os.remove(filepath)
             raise Exception(f"Failed to upload plot to Supabase: {e}")
     def _format_result(self, result: Any) -> str:
+        """Format the result for display"""
+        if isinstance(result, (pd.DataFrame, pd.Series)):
+            # Convert DataFrame to a string, then to a list of dicts (handles NumPy types)
+            json_str = result.to_json(orient='records', date_format='iso')
+            json.dumps(json.loads(json_str), indent=2)  # Re-parse for pretty formatting
+        elif isinstance(result, (dict, list)):
+            return json.dumps(result, indent=2)
+        return str(result)
     async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
+     """Process the response with proper variable handling"""
+     output_parts = [response.casual_response]
+     # Process analysis operations first
+     for operation in response.analysis_operations:
+         execution_result = self.execute_code(operation.code.code)
+         # Get the result from locals
+         result = self.exec_locals.get(operation.result_var)
+         if execution_result['error']:
+             output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
+             output_parts.append("```python\n" + execution_result['error']['message'] + "\n```")
+         elif result is not None:
+             # Handle empty/None results
+             if result is None or (hasattr(result, '__len__') and len(result) == 0):
+                 output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
+             else:
+                 output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
+                 output_parts.append("```python\n" + self._format_result(result) + "\n```")
+         else:
+             output_str = execution_result['output'].strip()
+             if output_str:
+                 output_parts.append(f"\nOutput for '{operation.result_var}':")
+                 output_parts.append("```\n" + output_str + "\n```")
+     # Process charts after all operations
+     if response.charts:
+         output_parts.append("\n📊 Visualizations:")
+         for chart in response.charts:
+             if chart.code:
+                 chart_result = self.execute_code(chart.code)
+                 if chart_result['plots']:
+                     for plot_data in chart_result['plots']:
+                         try:
+                             public_url = await self.save_plot_to_supabase(
+                                 plot_data=plot_data,
+                                 description=chart.image_description,
+                                 chat_id=chat_id
+                             )
+                             output_parts.append(f"\n🖼️ {chart.image_description}")
+                             output_parts.append(f"![{chart.image_description}]({public_url})")
+                         except Exception as e:
+                            output_parts.append(f"\n⚠️ Values are missing - Error uploading chart: {str(e)}")
+                 elif chart_result['error']:
+                     output_parts.append("```python\n" + f"Error generating {chart.image_description}: {chart_result['error']['message']}" + "\n```")
+                 else:
+                     output_parts.append(f"\n⚠️ Values are missing - No chart generated for '{chart.image_description}'")
+     return "\n".join(output_parts)