Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Jun 19

Commit

d90ad28

1 Parent(s): 708f990

added table renderer with scollbars

Browse files

Files changed (2) hide show

python_code_executor_service.py +312 -1
together_ai_llama_agent.py +77 -46

python_code_executor_service.py CHANGED Viewed

@@ -1,3 +1,260 @@
 import os
 from dotenv import load_dotenv
 import uuid
@@ -142,6 +399,29 @@ class PythonExecutor:
             'plots': plots
         }
     async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
         """
         Save plot to Supabase storage and return the public URL
@@ -189,6 +469,24 @@ class PythonExecutor:
             '\n' in output and '=' in output  # Python console output
         )
     async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
         """
         Process the CsvChatResult response and generate formatted output
@@ -219,7 +517,20 @@ class PythonExecutor:
                 output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
             else:
                 output = result['output'].strip()
-                if self._looks_like_structured_data(output):
                     output_parts.append("```python\n" + output + "\n```")
                 else:
                     output_parts.append(output)

+# import os
+# from dotenv import load_dotenv
+# import uuid
+# import matplotlib.pyplot as plt
+# from pathlib import Path
+# from typing import Dict, Any, List, Optional
+# import pandas as pd
+# import numpy as np
+# import json
+# import io
+# import contextlib
+# import traceback
+# import time
+# from datetime import datetime, timedelta
+# import seaborn as sns
+# import scipy.stats as stats
+# from pydantic import BaseModel
+# from supabase_service import upload_file_to_supabase
+# # Load environment variables from .env file
+# load_dotenv()
+# class CodeResponse(BaseModel):
+#     """Container for code-related responses"""
+#     language: str = "python"
+#     code: str
+# class ChartSpecification(BaseModel):
+#     """Details about requested charts"""
+#     image_description: str
+#     code: Optional[str] = None
+# class AnalysisOperation(BaseModel):
+#     """Container for a single analysis operation with its code and result"""
+#     code: CodeResponse
+#     description: str
+# class CsvChatResult(BaseModel):
+#     """Structured response for CSV-related AI interactions"""
+#     response_type: str  # Literal["casual", "data_analysis", "visualization", "mixed"]
+#     casual_response: str
+#     analysis_operations: List[AnalysisOperation]
+#     charts: Optional[List[ChartSpecification]] = None
+# class PythonExecutor:
+#     """Handles execution of Python code with comprehensive data analysis libraries"""
+#     def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
+#         """
+#         Initialize the PythonExecutor with a DataFrame
+#         Args:
+#             df (pd.DataFrame): The DataFrame to operate on
+#             charts_folder (str): Folder to save charts in
+#         """
+#         self.df = df
+#         self.charts_folder = Path(charts_folder)
+#         self.charts_folder.mkdir(exist_ok=True)
+#     def execute_code(self, code: str) -> Dict[str, Any]:
+#         """
+#         Execute Python code with full data analysis context and return results
+#         Args:
+#             code (str): Python code to execute
+#         Returns:
+#             dict: Dictionary containing execution results and any generated plots
+#         """
+#         output = ""
+#         error = None
+#         plots = []
+#         # Capture stdout
+#         stdout = io.StringIO()
+#         # Monkey patch plt.show() to save figures
+#         original_show = plt.show
+#         def custom_show():
+#             """Custom show function that saves plots instead of displaying them"""
+#             for i, fig in enumerate(plt.get_fignums()):
+#                 figure = plt.figure(fig)
+#                 # Save plot to bytes buffer
+#                 buf = io.BytesIO()
+#                 figure.savefig(buf, format='png', bbox_inches='tight')
+#                 buf.seek(0)
+#                 plots.append(buf.read())
+#             plt.close('all')
+#         try:
+#             # Create comprehensive execution context with data analysis libraries
+#             exec_globals = {
+#                 # Core data analysis
+#                 'pd': pd,
+#                 'np': np,
+#                 'df': self.df,
+#                 # Visualization
+#                 'plt': plt,
+#                 'sns': sns,
+#                 # Statistics
+#                 'stats': stats,
+#                 # Date/time
+#                 'datetime': datetime,
+#                 'timedelta': timedelta,
+#                 'time': time,
+#                 # Utilities
+#                 'json': json,
+#                 '__builtins__': __builtins__,
+#             }
+#             # Replace plt.show with custom implementation
+#             plt.show = custom_show
+#             # Execute code and capture output
+#             with contextlib.redirect_stdout(stdout):
+#                 exec(code, exec_globals)
+#             output = stdout.getvalue()
+#         except Exception as e:
+#             error = {
+#                 "message": str(e),
+#                 "traceback": traceback.format_exc()
+#             }
+#         finally:
+#             # Restore original plt.show
+#             plt.show = original_show
+#         return {
+#             'output': output,
+#             'error': error,
+#             'plots': plots
+#         }
+#     async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
+#         """
+#         Save plot to Supabase storage and return the public URL
+#         Args:
+#             plot_data (bytes): Image data in bytes
+#             description (str): Description of the plot
+#             chat_id (str): ID of the chat session
+#         Returns:
+#             str: Public URL of the uploaded chart
+#         """
+#         # Generate unique filename
+#         filename = f"chart_{uuid.uuid4().hex}.png"
+#         filepath = self.charts_folder / filename
+#         # Save the plot locally first
+#         with open(filepath, 'wb') as f:
+#             f.write(plot_data)
+#         try:
+#             # Upload to Supabase
+#             public_url = await upload_file_to_supabase(
+#                 file_path=str(filepath),
+#                 file_name=filename,
+#                 chat_id=chat_id
+#             )
+#             # Remove the local file after upload
+#             os.remove(filepath)
+#             return public_url
+#         except Exception as e:
+#             # Clean up local file if upload fails
+#             if os.path.exists(filepath):
+#                 os.remove(filepath)
+#             raise Exception(f"Failed to upload plot to Supabase: {e}")
+#     def _looks_like_structured_data(self, output: str) -> bool:
+#         """Helper to detect JSON-like or array-like output"""
+#         output = output.strip()
+#         return (
+#             output.startswith('{') and output.endswith('}') or  # JSON object
+#             output.startswith('[') and output.endswith(']') or  # Array
+#             '\n' in output and '=' in output  # Python console output
+#         )
+#     async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
+#         """
+#         Process the CsvChatResult response and generate formatted output
+#         with markdown code blocks for structured data.
+#         Args:
+#             response (CsvChatResult): Response from CSV analysis
+#             chat_id (str): ID of the chat session
+#         Returns:
+#             str: Formatted output with results and image URLs
+#         """
+#         output_parts = []
+#         # Add casual response
+#         output_parts.append(response.casual_response)
+#         # Process analysis operations
+#         for operation in response.analysis_operations:
+#             # Execute the code
+#             result = self.execute_code(operation.code.code)
+#             # Add operation description
+#             output_parts.append(f"\n{operation.description}:")
+#             # Add output or error with markdown wrapping
+#             if result['error']:
+#                 output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
+#             else:
+#                 output = result['output'].strip()
+#                 if self._looks_like_structured_data(output):
+#                     output_parts.append("```python\n" + output + "\n```")
+#                 else:
+#                     output_parts.append(output)
+#         # Process charts
+#         if response.charts:
+#             output_parts.append("\nVisualizations:")
+#             for chart in response.charts:
+#                 if chart.code:
+#                     result = self.execute_code(chart.code)
+#                     if result['plots']:
+#                         for plot_data in result['plots']:
+#                             try:
+#                                 public_url = await self.save_plot_to_supabase(
+#                                     plot_data=plot_data,
+#                                     description=chart.image_description,
+#                                     chat_id=chat_id
+#                                 )
+#                                 output_parts.append(f"\n{chart.image_description}")
+#                                 output_parts.append(f"![{chart.image_description}]({public_url})")
+#                             except Exception as e:
+#                                 output_parts.append(f"\nError uploading chart: {str(e)}")
+#                     elif result['error']:
+#                         output_parts.append("```python\n" + f"Error generating {chart.image_description}: {result['error']['message']}" + "\n```")
+#         return "\n".join(output_parts)
+# Table formatter
 import os
 from dotenv import load_dotenv
 import uuid
             'plots': plots
         }
+    def _convert_dataframe_to_text(self, df: pd.DataFrame) -> str:
+        """
+        Convert pandas DataFrame to a text format that can be easily rendered
+        in the frontend using the ScrollableTableRenderer component.
+        Args:
+            df (pd.DataFrame): DataFrame to convert
+        Returns:
+            str: Text representation of the DataFrame
+        """
+        # Convert DataFrame to string with proper formatting
+        df_str = df.to_string(index=True)
+        # Split into lines and clean up
+        lines = df_str.split('\n')
+        # Remove any trailing whitespace from each line
+        cleaned_lines = [line.rstrip() for line in lines]
+        # Join back with newlines
+        return '\n'.join(cleaned_lines)
     async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
         """
         Save plot to Supabase storage and return the public URL
             '\n' in output and '=' in output  # Python console output
         )
+    def _is_dataframe_output(self, output: str) -> bool:
+        """Helper to detect if output looks like a pandas DataFrame"""
+        lines = output.strip().split('\n')
+        if len(lines) < 2:
+            return False
+        # Check for typical DataFrame header pattern
+        first_line = lines[0].strip()
+        second_line = lines[1].strip()
+        # Look for column headers and separator line
+        if not first_line or not second_line:
+            return False
+        # Check if the first line contains column names
+        # and the second line has some alignment characters
+        return True
     async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
         """
         Process the CsvChatResult response and generate formatted output
                 output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
             else:
                 output = result['output'].strip()
+                # Check if the output is a DataFrame-like structure
+                if self._is_dataframe_output(output):
+                    # Convert to a clean text format for frontend rendering
+                    try:
+                        # Get the last evaluated expression which might be the DataFrame
+                        # This is a simple approach - in practice you might need a more robust way
+                        # to capture the actual DataFrame from the execution context
+                        df_output = self._convert_dataframe_to_text(eval(operation.code.code.split('\n')[-1], globals(), locals()))
+                        output_parts.append("```text\n" + df_output + "\n```")
+                    except:
+                        # Fall back to regular output if we can't convert it
+                        output_parts.append("```text\n" + output + "\n```")
+                elif self._looks_like_structured_data(output):
                     output_parts.append("```python\n" + output + "\n```")
                 else:
                     output_parts.append(output)

together_ai_llama_agent.py CHANGED Viewed

@@ -89,64 +89,95 @@ def get_csv_info(df: pd.DataFrame) -> dict:
 # """
 #     return
 def get_csv_system_prompt(df: pd.DataFrame) -> str:
-    """Generate system prompt for CSV analysis"""
     csv_info = get_csv_info(df)
-    prompt = f"""
-You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
-CSV Info:
-- Rows: {csv_info['num_rows']}, Cols: {csv_info['num_cols']}
 - Columns: {csv_info['columns']}
 - Sample: {csv_info['example_rows']}
 - Dtypes: {csv_info['dtypes']}
-Strict Rules:
-1. Never recreate 'df' - use the existing variable
-2. For analysis:
-   - Include necessary imports (except pandas) and include complete code
-   - Use df directly (e.g., print(df[...].mean()))
 3. For visualizations:
-   - Create the most professional, publication-quality charts possible
-   - Maximize descriptive elements and detail while maintaining clarity
-   - Figure size: (14, 8) for complex charts, (12, 6) for simpler ones
-   - Use comprehensive titles (fontsize=16) and axis labels (fontsize=14)
-   - Include informative legends (fontsize=12) when appropriate
-   - Add annotations for important data points where valuable
-   - Rotate x-labels (45° if needed) with fontsize=12 for readability
-   - Use colorblind-friendly palettes (seaborn 'deep', 'muted', or 'colorblind')
-   - Add gridlines (alpha=0.3) when they improve readability
-   - Include proper margins and padding to prevent label cutoff
-   - For distributions, include kernel density estimates when appropriate
-   - For time series, use appropriate date formatting and markers
-   - Do not use any visualization library other than matplotlib or seaborn
-   - Complete code with plt.tight_layout() before plt.show()
-   - Example professional chart:
      plt.figure(figsize=(14, 8))
-     ax = sns.barplot(x='category', y='value', data=df, palette='muted', ci=None)
-     plt.title('Detailed Analysis of Values by Category', fontsize=16, pad=20)
-     plt.xlabel('Category', fontsize=14)
-     plt.ylabel('Average Value', fontsize=14)
-     plt.xticks(rotation=45, ha='right', fontsize=12)
-     plt.yticks(fontsize=12)
-     ax.grid(True, linestyle='--', alpha=0.3)
-     for p in ax.patches:
-         ax.annotate(f'{{p.get_height():.1f}}',
-                    (p.get_x() + p.get_width() / 2., p.get_height()),
-                    ha='center', va='center',
-                    xytext=(0, 10),
-                    textcoords='offset points',
-                    fontsize=12)
      plt.tight_layout()
      plt.show()
-4. For Lists and Dictionaries, always return them as JSON
-Example:
-import json
-print(json.dumps(df[df['col'] == 'val'].to_dict('records'), indent=2))
 """
-    return prompt
 def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:

 # """
 #     return
+# def get_csv_system_prompt(df: pd.DataFrame) -> str:
+#     """Generate system prompt for CSV analysis"""
+#     csv_info = get_csv_info(df)
+#     prompt = f"""
+# You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
+# CSV Info:
+# - Rows: {csv_info['num_rows']}, Cols: {csv_info['num_cols']}
+# - Columns: {csv_info['columns']}
+# - Sample: {csv_info['example_rows']}
+# - Dtypes: {csv_info['dtypes']}
+# Strict Rules:
+# 1. Never recreate 'df' - use the existing variable
+# 2. For analysis:
+#    - Include necessary imports (except pandas) and include complete code
+#    - Use df directly (e.g., print(df[...].mean()))
+# 3. For visualizations:
+#    - Create the most professional, publication-quality charts possible
+#    - Maximize descriptive elements and detail while maintaining clarity
+#    - Figure size: (14, 8) for complex charts, (12, 6) for simpler ones
+#    - Use comprehensive titles (fontsize=16) and axis labels (fontsize=14)
+#    - Include informative legends (fontsize=12) when appropriate
+#    - Add annotations for important data points where valuable
+#    - Rotate x-labels (45° if needed) with fontsize=12 for readability
+#    - Use colorblind-friendly palettes (seaborn 'deep', 'muted', or 'colorblind')
+#    - Add gridlines (alpha=0.3) when they improve readability
+#    - Include proper margins and padding to prevent label cutoff
+#    - For distributions, include kernel density estimates when appropriate
+#    - For time series, use appropriate date formatting and markers
+#    - Do not use any visualization library other than matplotlib or seaborn
+#    - Complete code with plt.tight_layout() before plt.show()
+#    - Example professional chart:
+#      plt.figure(figsize=(14, 8))
+#      ax = sns.barplot(x='category', y='value', data=df, palette='muted', ci=None)
+#      plt.title('Detailed Analysis of Values by Category', fontsize=16, pad=20)
+#      plt.xlabel('Category', fontsize=14)
+#      plt.ylabel('Average Value', fontsize=14)
+#      plt.xticks(rotation=45, ha='right', fontsize=12)
+#      plt.yticks(fontsize=12)
+#      ax.grid(True, linestyle='--', alpha=0.3)
+#      for p in ax.patches:
+#          ax.annotate(f'{{p.get_height():.1f}}',
+#                     (p.get_x() + p.get_width() / 2., p.get_height()),
+#                     ha='center', va='center',
+#                     xytext=(0, 10),
+#                     textcoords='offset points',
+#                     fontsize=12)
+#      plt.tight_layout()
+#      plt.show()
+# 4. For Lists and Dictionaries, always return them as JSON
+# Example:
+# import json
+# print(json.dumps(df[df['col'] == 'val'].to_dict('records'), indent=2))
+# """
+#     return prompt
 def get_csv_system_prompt(df: pd.DataFrame) -> str:
+    """Generate concise system prompt for CSV analysis"""
     csv_info = get_csv_info(df)
+    return f"""
+Analyze this pandas DataFrame ('df'):
+- Shape: {csv_info['num_rows']} rows, {csv_info['num_cols']} cols
 - Columns: {csv_info['columns']}
 - Sample: {csv_info['example_rows']}
 - Dtypes: {csv_info['dtypes']}
+Rules:
+1. Use existing 'df' variable
+2. Include complete code with imports (except pandas)
 3. For visualizations:
+   - Use matplotlib/seaborn only
+   - Professional style: figsize (12-14, 6-8), clear titles/labels (fontsize 14-16)
+   - Rotate x-labels if needed (45°), use colorblind-friendly palettes
+   - Add gridlines (alpha=0.3), annotations, and tight_layout()
+   - Example:
      plt.figure(figsize=(14, 8))
+     ax = sns.barplot(x='category', y='value', data=df)
+     plt.title('Analysis Title', fontsize=16)
+     plt.xticks(rotation=45)
+     ax.grid(alpha=0.3)
      plt.tight_layout()
      plt.show()
+4. Return lists/dicts as JSON
 """
 def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent: