Soumik555 commited on
Commit
d90ad28
·
1 Parent(s): 708f990

added table renderer with scollbars

Browse files
python_code_executor_service.py CHANGED
@@ -1,3 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
  import uuid
@@ -142,6 +399,29 @@ class PythonExecutor:
142
  'plots': plots
143
  }
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
146
  """
147
  Save plot to Supabase storage and return the public URL
@@ -189,6 +469,24 @@ class PythonExecutor:
189
  '\n' in output and '=' in output # Python console output
190
  )
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
193
  """
194
  Process the CsvChatResult response and generate formatted output
@@ -219,7 +517,20 @@ class PythonExecutor:
219
  output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
220
  else:
221
  output = result['output'].strip()
222
- if self._looks_like_structured_data(output):
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  output_parts.append("```python\n" + output + "\n```")
224
  else:
225
  output_parts.append(output)
 
1
+ # import os
2
+ # from dotenv import load_dotenv
3
+ # import uuid
4
+ # import matplotlib.pyplot as plt
5
+ # from pathlib import Path
6
+ # from typing import Dict, Any, List, Optional
7
+ # import pandas as pd
8
+ # import numpy as np
9
+ # import json
10
+ # import io
11
+ # import contextlib
12
+ # import traceback
13
+ # import time
14
+ # from datetime import datetime, timedelta
15
+ # import seaborn as sns
16
+ # import scipy.stats as stats
17
+ # from pydantic import BaseModel
18
+
19
+ # from supabase_service import upload_file_to_supabase
20
+
21
+ # # Load environment variables from .env file
22
+ # load_dotenv()
23
+
24
+ # class CodeResponse(BaseModel):
25
+ # """Container for code-related responses"""
26
+ # language: str = "python"
27
+ # code: str
28
+
29
+
30
+ # class ChartSpecification(BaseModel):
31
+ # """Details about requested charts"""
32
+ # image_description: str
33
+ # code: Optional[str] = None
34
+
35
+
36
+ # class AnalysisOperation(BaseModel):
37
+ # """Container for a single analysis operation with its code and result"""
38
+ # code: CodeResponse
39
+ # description: str
40
+
41
+
42
+ # class CsvChatResult(BaseModel):
43
+ # """Structured response for CSV-related AI interactions"""
44
+ # response_type: str # Literal["casual", "data_analysis", "visualization", "mixed"]
45
+ # casual_response: str
46
+ # analysis_operations: List[AnalysisOperation]
47
+ # charts: Optional[List[ChartSpecification]] = None
48
+
49
+
50
+ # class PythonExecutor:
51
+ # """Handles execution of Python code with comprehensive data analysis libraries"""
52
+
53
+ # def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
54
+ # """
55
+ # Initialize the PythonExecutor with a DataFrame
56
+
57
+ # Args:
58
+ # df (pd.DataFrame): The DataFrame to operate on
59
+ # charts_folder (str): Folder to save charts in
60
+ # """
61
+ # self.df = df
62
+ # self.charts_folder = Path(charts_folder)
63
+ # self.charts_folder.mkdir(exist_ok=True)
64
+
65
+ # def execute_code(self, code: str) -> Dict[str, Any]:
66
+ # """
67
+ # Execute Python code with full data analysis context and return results
68
+
69
+ # Args:
70
+ # code (str): Python code to execute
71
+
72
+ # Returns:
73
+ # dict: Dictionary containing execution results and any generated plots
74
+ # """
75
+ # output = ""
76
+ # error = None
77
+ # plots = []
78
+
79
+ # # Capture stdout
80
+ # stdout = io.StringIO()
81
+
82
+ # # Monkey patch plt.show() to save figures
83
+ # original_show = plt.show
84
+
85
+ # def custom_show():
86
+ # """Custom show function that saves plots instead of displaying them"""
87
+ # for i, fig in enumerate(plt.get_fignums()):
88
+ # figure = plt.figure(fig)
89
+ # # Save plot to bytes buffer
90
+ # buf = io.BytesIO()
91
+ # figure.savefig(buf, format='png', bbox_inches='tight')
92
+ # buf.seek(0)
93
+ # plots.append(buf.read())
94
+ # plt.close('all')
95
+
96
+ # try:
97
+ # # Create comprehensive execution context with data analysis libraries
98
+ # exec_globals = {
99
+ # # Core data analysis
100
+ # 'pd': pd,
101
+ # 'np': np,
102
+ # 'df': self.df,
103
+
104
+ # # Visualization
105
+ # 'plt': plt,
106
+ # 'sns': sns,
107
+
108
+ # # Statistics
109
+ # 'stats': stats,
110
+
111
+ # # Date/time
112
+ # 'datetime': datetime,
113
+ # 'timedelta': timedelta,
114
+ # 'time': time,
115
+
116
+ # # Utilities
117
+ # 'json': json,
118
+ # '__builtins__': __builtins__,
119
+ # }
120
+
121
+ # # Replace plt.show with custom implementation
122
+ # plt.show = custom_show
123
+
124
+ # # Execute code and capture output
125
+ # with contextlib.redirect_stdout(stdout):
126
+ # exec(code, exec_globals)
127
+
128
+ # output = stdout.getvalue()
129
+
130
+ # except Exception as e:
131
+ # error = {
132
+ # "message": str(e),
133
+ # "traceback": traceback.format_exc()
134
+ # }
135
+ # finally:
136
+ # # Restore original plt.show
137
+ # plt.show = original_show
138
+
139
+ # return {
140
+ # 'output': output,
141
+ # 'error': error,
142
+ # 'plots': plots
143
+ # }
144
+
145
+ # async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
146
+ # """
147
+ # Save plot to Supabase storage and return the public URL
148
+
149
+ # Args:
150
+ # plot_data (bytes): Image data in bytes
151
+ # description (str): Description of the plot
152
+ # chat_id (str): ID of the chat session
153
+
154
+ # Returns:
155
+ # str: Public URL of the uploaded chart
156
+ # """
157
+ # # Generate unique filename
158
+ # filename = f"chart_{uuid.uuid4().hex}.png"
159
+ # filepath = self.charts_folder / filename
160
+
161
+ # # Save the plot locally first
162
+ # with open(filepath, 'wb') as f:
163
+ # f.write(plot_data)
164
+
165
+ # try:
166
+ # # Upload to Supabase
167
+ # public_url = await upload_file_to_supabase(
168
+ # file_path=str(filepath),
169
+ # file_name=filename,
170
+ # chat_id=chat_id
171
+ # )
172
+
173
+ # # Remove the local file after upload
174
+ # os.remove(filepath)
175
+
176
+ # return public_url
177
+ # except Exception as e:
178
+ # # Clean up local file if upload fails
179
+ # if os.path.exists(filepath):
180
+ # os.remove(filepath)
181
+ # raise Exception(f"Failed to upload plot to Supabase: {e}")
182
+
183
+ # def _looks_like_structured_data(self, output: str) -> bool:
184
+ # """Helper to detect JSON-like or array-like output"""
185
+ # output = output.strip()
186
+ # return (
187
+ # output.startswith('{') and output.endswith('}') or # JSON object
188
+ # output.startswith('[') and output.endswith(']') or # Array
189
+ # '\n' in output and '=' in output # Python console output
190
+ # )
191
+
192
+ # async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
193
+ # """
194
+ # Process the CsvChatResult response and generate formatted output
195
+ # with markdown code blocks for structured data.
196
+
197
+ # Args:
198
+ # response (CsvChatResult): Response from CSV analysis
199
+ # chat_id (str): ID of the chat session
200
+
201
+ # Returns:
202
+ # str: Formatted output with results and image URLs
203
+ # """
204
+ # output_parts = []
205
+
206
+ # # Add casual response
207
+ # output_parts.append(response.casual_response)
208
+
209
+ # # Process analysis operations
210
+ # for operation in response.analysis_operations:
211
+ # # Execute the code
212
+ # result = self.execute_code(operation.code.code)
213
+
214
+ # # Add operation description
215
+ # output_parts.append(f"\n{operation.description}:")
216
+
217
+ # # Add output or error with markdown wrapping
218
+ # if result['error']:
219
+ # output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
220
+ # else:
221
+ # output = result['output'].strip()
222
+ # if self._looks_like_structured_data(output):
223
+ # output_parts.append("```python\n" + output + "\n```")
224
+ # else:
225
+ # output_parts.append(output)
226
+
227
+ # # Process charts
228
+ # if response.charts:
229
+ # output_parts.append("\nVisualizations:")
230
+ # for chart in response.charts:
231
+ # if chart.code:
232
+ # result = self.execute_code(chart.code)
233
+ # if result['plots']:
234
+ # for plot_data in result['plots']:
235
+ # try:
236
+ # public_url = await self.save_plot_to_supabase(
237
+ # plot_data=plot_data,
238
+ # description=chart.image_description,
239
+ # chat_id=chat_id
240
+ # )
241
+ # output_parts.append(f"\n{chart.image_description}")
242
+ # output_parts.append(f"![{chart.image_description}]({public_url})")
243
+ # except Exception as e:
244
+ # output_parts.append(f"\nError uploading chart: {str(e)}")
245
+ # elif result['error']:
246
+ # output_parts.append("```python\n" + f"Error generating {chart.image_description}: {result['error']['message']}" + "\n```")
247
+
248
+ # return "\n".join(output_parts)
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+ # Table formatter
257
+
258
  import os
259
  from dotenv import load_dotenv
260
  import uuid
 
399
  'plots': plots
400
  }
401
 
402
+ def _convert_dataframe_to_text(self, df: pd.DataFrame) -> str:
403
+ """
404
+ Convert pandas DataFrame to a text format that can be easily rendered
405
+ in the frontend using the ScrollableTableRenderer component.
406
+
407
+ Args:
408
+ df (pd.DataFrame): DataFrame to convert
409
+
410
+ Returns:
411
+ str: Text representation of the DataFrame
412
+ """
413
+ # Convert DataFrame to string with proper formatting
414
+ df_str = df.to_string(index=True)
415
+
416
+ # Split into lines and clean up
417
+ lines = df_str.split('\n')
418
+
419
+ # Remove any trailing whitespace from each line
420
+ cleaned_lines = [line.rstrip() for line in lines]
421
+
422
+ # Join back with newlines
423
+ return '\n'.join(cleaned_lines)
424
+
425
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
426
  """
427
  Save plot to Supabase storage and return the public URL
 
469
  '\n' in output and '=' in output # Python console output
470
  )
471
 
472
+ def _is_dataframe_output(self, output: str) -> bool:
473
+ """Helper to detect if output looks like a pandas DataFrame"""
474
+ lines = output.strip().split('\n')
475
+ if len(lines) < 2:
476
+ return False
477
+
478
+ # Check for typical DataFrame header pattern
479
+ first_line = lines[0].strip()
480
+ second_line = lines[1].strip()
481
+
482
+ # Look for column headers and separator line
483
+ if not first_line or not second_line:
484
+ return False
485
+
486
+ # Check if the first line contains column names
487
+ # and the second line has some alignment characters
488
+ return True
489
+
490
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
491
  """
492
  Process the CsvChatResult response and generate formatted output
 
517
  output_parts.append("```python\n" + f"Error: {result['error']['message']}" + "\n```")
518
  else:
519
  output = result['output'].strip()
520
+
521
+ # Check if the output is a DataFrame-like structure
522
+ if self._is_dataframe_output(output):
523
+ # Convert to a clean text format for frontend rendering
524
+ try:
525
+ # Get the last evaluated expression which might be the DataFrame
526
+ # This is a simple approach - in practice you might need a more robust way
527
+ # to capture the actual DataFrame from the execution context
528
+ df_output = self._convert_dataframe_to_text(eval(operation.code.code.split('\n')[-1], globals(), locals()))
529
+ output_parts.append("```text\n" + df_output + "\n```")
530
+ except:
531
+ # Fall back to regular output if we can't convert it
532
+ output_parts.append("```text\n" + output + "\n```")
533
+ elif self._looks_like_structured_data(output):
534
  output_parts.append("```python\n" + output + "\n```")
535
  else:
536
  output_parts.append(output)
together_ai_llama_agent.py CHANGED
@@ -89,64 +89,95 @@ def get_csv_info(df: pd.DataFrame) -> dict:
89
  # """
90
  # return
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def get_csv_system_prompt(df: pd.DataFrame) -> str:
93
- """Generate system prompt for CSV analysis"""
94
  csv_info = get_csv_info(df)
95
 
96
- prompt = f"""
97
- You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
98
-
99
- CSV Info:
100
- - Rows: {csv_info['num_rows']}, Cols: {csv_info['num_cols']}
101
  - Columns: {csv_info['columns']}
102
  - Sample: {csv_info['example_rows']}
103
  - Dtypes: {csv_info['dtypes']}
104
 
105
- Strict Rules:
106
- 1. Never recreate 'df' - use the existing variable
107
- 2. For analysis:
108
- - Include necessary imports (except pandas) and include complete code
109
- - Use df directly (e.g., print(df[...].mean()))
110
  3. For visualizations:
111
- - Create the most professional, publication-quality charts possible
112
- - Maximize descriptive elements and detail while maintaining clarity
113
- - Figure size: (14, 8) for complex charts, (12, 6) for simpler ones
114
- - Use comprehensive titles (fontsize=16) and axis labels (fontsize=14)
115
- - Include informative legends (fontsize=12) when appropriate
116
- - Add annotations for important data points where valuable
117
- - Rotate x-labels (45° if needed) with fontsize=12 for readability
118
- - Use colorblind-friendly palettes (seaborn 'deep', 'muted', or 'colorblind')
119
- - Add gridlines (alpha=0.3) when they improve readability
120
- - Include proper margins and padding to prevent label cutoff
121
- - For distributions, include kernel density estimates when appropriate
122
- - For time series, use appropriate date formatting and markers
123
- - Do not use any visualization library other than matplotlib or seaborn
124
- - Complete code with plt.tight_layout() before plt.show()
125
- - Example professional chart:
126
  plt.figure(figsize=(14, 8))
127
- ax = sns.barplot(x='category', y='value', data=df, palette='muted', ci=None)
128
- plt.title('Detailed Analysis of Values by Category', fontsize=16, pad=20)
129
- plt.xlabel('Category', fontsize=14)
130
- plt.ylabel('Average Value', fontsize=14)
131
- plt.xticks(rotation=45, ha='right', fontsize=12)
132
- plt.yticks(fontsize=12)
133
- ax.grid(True, linestyle='--', alpha=0.3)
134
- for p in ax.patches:
135
- ax.annotate(f'{{p.get_height():.1f}}',
136
- (p.get_x() + p.get_width() / 2., p.get_height()),
137
- ha='center', va='center',
138
- xytext=(0, 10),
139
- textcoords='offset points',
140
- fontsize=12)
141
  plt.tight_layout()
142
  plt.show()
143
- 4. For Lists and Dictionaries, always return them as JSON
144
-
145
- Example:
146
- import json
147
- print(json.dumps(df[df['col'] == 'val'].to_dict('records'), indent=2))
148
  """
149
- return prompt
150
 
151
 
152
  def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:
 
89
  # """
90
  # return
91
 
92
+ # def get_csv_system_prompt(df: pd.DataFrame) -> str:
93
+ # """Generate system prompt for CSV analysis"""
94
+ # csv_info = get_csv_info(df)
95
+
96
+ # prompt = f"""
97
+ # You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
98
+
99
+ # CSV Info:
100
+ # - Rows: {csv_info['num_rows']}, Cols: {csv_info['num_cols']}
101
+ # - Columns: {csv_info['columns']}
102
+ # - Sample: {csv_info['example_rows']}
103
+ # - Dtypes: {csv_info['dtypes']}
104
+
105
+ # Strict Rules:
106
+ # 1. Never recreate 'df' - use the existing variable
107
+ # 2. For analysis:
108
+ # - Include necessary imports (except pandas) and include complete code
109
+ # - Use df directly (e.g., print(df[...].mean()))
110
+ # 3. For visualizations:
111
+ # - Create the most professional, publication-quality charts possible
112
+ # - Maximize descriptive elements and detail while maintaining clarity
113
+ # - Figure size: (14, 8) for complex charts, (12, 6) for simpler ones
114
+ # - Use comprehensive titles (fontsize=16) and axis labels (fontsize=14)
115
+ # - Include informative legends (fontsize=12) when appropriate
116
+ # - Add annotations for important data points where valuable
117
+ # - Rotate x-labels (45° if needed) with fontsize=12 for readability
118
+ # - Use colorblind-friendly palettes (seaborn 'deep', 'muted', or 'colorblind')
119
+ # - Add gridlines (alpha=0.3) when they improve readability
120
+ # - Include proper margins and padding to prevent label cutoff
121
+ # - For distributions, include kernel density estimates when appropriate
122
+ # - For time series, use appropriate date formatting and markers
123
+ # - Do not use any visualization library other than matplotlib or seaborn
124
+ # - Complete code with plt.tight_layout() before plt.show()
125
+ # - Example professional chart:
126
+ # plt.figure(figsize=(14, 8))
127
+ # ax = sns.barplot(x='category', y='value', data=df, palette='muted', ci=None)
128
+ # plt.title('Detailed Analysis of Values by Category', fontsize=16, pad=20)
129
+ # plt.xlabel('Category', fontsize=14)
130
+ # plt.ylabel('Average Value', fontsize=14)
131
+ # plt.xticks(rotation=45, ha='right', fontsize=12)
132
+ # plt.yticks(fontsize=12)
133
+ # ax.grid(True, linestyle='--', alpha=0.3)
134
+ # for p in ax.patches:
135
+ # ax.annotate(f'{{p.get_height():.1f}}',
136
+ # (p.get_x() + p.get_width() / 2., p.get_height()),
137
+ # ha='center', va='center',
138
+ # xytext=(0, 10),
139
+ # textcoords='offset points',
140
+ # fontsize=12)
141
+ # plt.tight_layout()
142
+ # plt.show()
143
+ # 4. For Lists and Dictionaries, always return them as JSON
144
+
145
+ # Example:
146
+ # import json
147
+ # print(json.dumps(df[df['col'] == 'val'].to_dict('records'), indent=2))
148
+ # """
149
+ # return prompt
150
+
151
+
152
  def get_csv_system_prompt(df: pd.DataFrame) -> str:
153
+ """Generate concise system prompt for CSV analysis"""
154
  csv_info = get_csv_info(df)
155
 
156
+ return f"""
157
+ Analyze this pandas DataFrame ('df'):
158
+ - Shape: {csv_info['num_rows']} rows, {csv_info['num_cols']} cols
 
 
159
  - Columns: {csv_info['columns']}
160
  - Sample: {csv_info['example_rows']}
161
  - Dtypes: {csv_info['dtypes']}
162
 
163
+ Rules:
164
+ 1. Use existing 'df' variable
165
+ 2. Include complete code with imports (except pandas)
 
 
166
  3. For visualizations:
167
+ - Use matplotlib/seaborn only
168
+ - Professional style: figsize (12-14, 6-8), clear titles/labels (fontsize 14-16)
169
+ - Rotate x-labels if needed (45°), use colorblind-friendly palettes
170
+ - Add gridlines (alpha=0.3), annotations, and tight_layout()
171
+ - Example:
 
 
 
 
 
 
 
 
 
 
172
  plt.figure(figsize=(14, 8))
173
+ ax = sns.barplot(x='category', y='value', data=df)
174
+ plt.title('Analysis Title', fontsize=16)
175
+ plt.xticks(rotation=45)
176
+ ax.grid(alpha=0.3)
 
 
 
 
 
 
 
 
 
 
177
  plt.tight_layout()
178
  plt.show()
179
+ 4. Return lists/dicts as JSON
 
 
 
 
180
  """
 
181
 
182
 
183
  def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent: