Soumik555 commited on
Commit
938b9b4
·
1 Parent(s): 2a71df1

DB Chat for pro users only

Browse files
Files changed (1) hide show
  1. python_code_executor_service.py +75 -125
python_code_executor_service.py CHANGED
@@ -16,6 +16,7 @@ import seaborn as sns
16
  import scipy.stats as stats
17
  from pydantic import BaseModel
18
  from tabulate import tabulate
 
19
 
20
  from supabase_service import upload_file_to_supabase
21
 
@@ -27,12 +28,20 @@ class CodeResponse(BaseModel):
27
  language: str = "python"
28
  code: str
29
 
 
 
 
 
30
 
31
  class ChartSpecification(BaseModel):
32
  """Details about requested charts"""
33
  image_description: str
34
  code: Optional[str] = None
35
 
 
 
 
 
36
 
37
  class AnalysisOperation(BaseModel):
38
  """Container for a single analysis operation with its code and result"""
@@ -47,48 +56,33 @@ class CsvChatResult(BaseModel):
47
  analysis_operations: List[AnalysisOperation]
48
  charts: Optional[List[ChartSpecification]] = None
49
 
 
 
 
 
 
 
 
50
 
51
  class PythonExecutor:
52
  """Handles execution of Python code with comprehensive data analysis libraries"""
53
 
54
  def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
55
- """
56
- Initialize the PythonExecutor with a DataFrame
57
-
58
- Args:
59
- df (pd.DataFrame): The DataFrame to operate on
60
- charts_folder (str): Folder to save charts in
61
- """
62
  self.df = df
63
  self.charts_folder = Path(charts_folder)
64
  self.charts_folder.mkdir(exist_ok=True)
65
  self.exec_locals = {}
66
 
67
  def execute_code(self, code: str) -> Dict[str, Any]:
68
- """
69
- Execute Python code with full data analysis context and return results
70
-
71
- Args:
72
- code (str): Python code to execute
73
-
74
- Returns:
75
- dict: Dictionary containing execution results and any generated plots
76
- """
77
  output = ""
78
  error = None
79
  plots = []
80
-
81
- # Capture stdout
82
  stdout = io.StringIO()
83
-
84
- # Monkey patch plt.show() to save figures
85
  original_show = plt.show
86
 
87
  def custom_show():
88
- """Custom show function that saves plots instead of displaying them"""
89
  for i, fig in enumerate(plt.get_fignums()):
90
  figure = plt.figure(fig)
91
- # Save plot to bytes buffer
92
  buf = io.BytesIO()
93
  figure.savefig(buf, format='png', bbox_inches='tight')
94
  buf.seek(0)
@@ -96,47 +90,22 @@ class PythonExecutor:
96
  plt.close('all')
97
 
98
  try:
99
- # Create comprehensive execution context with data analysis libraries
100
  exec_globals = {
101
- # Core data analysis
102
- 'pd': pd,
103
- 'np': np,
104
- 'df': self.df,
105
-
106
- # Visualization
107
- 'plt': plt,
108
- 'sns': sns,
109
- 'tabulate': tabulate,
110
-
111
- # Statistics
112
- 'stats': stats,
113
-
114
- # Date/time
115
- 'datetime': datetime,
116
- 'timedelta': timedelta,
117
- 'time': time,
118
-
119
- # Utilities
120
- 'json': json,
121
- '__builtins__': __builtins__,
122
  }
123
 
124
- # Replace plt.show with custom implementation
125
  plt.show = custom_show
126
-
127
- # Execute code and capture output
128
  with contextlib.redirect_stdout(stdout):
129
  exec(code, exec_globals, self.exec_locals)
130
-
131
  output = stdout.getvalue()
132
 
133
  except Exception as e:
134
- error = {
135
- "message": str(e),
136
- "traceback": traceback.format_exc()
137
- }
138
  finally:
139
- # Restore original plt.show
140
  plt.show = original_show
141
 
142
  return {
@@ -147,102 +116,83 @@ class PythonExecutor:
147
  }
148
 
149
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
150
- """
151
- Save plot to Supabase storage and return the public URL
152
-
153
- Args:
154
- plot_data (bytes): Image data in bytes
155
- description (str): Description of the plot
156
- chat_id (str): ID of the chat session
157
-
158
- Returns:
159
- str: Public URL of the uploaded chart
160
- """
161
- # Generate unique filename
162
  filename = f"chart_{uuid.uuid4().hex}.png"
163
  filepath = self.charts_folder / filename
164
 
165
- # Save the plot locally first
166
  with open(filepath, 'wb') as f:
167
  f.write(plot_data)
168
 
169
  try:
170
- # Upload to Supabase
171
  public_url = await upload_file_to_supabase(
172
  file_path=str(filepath),
173
  file_name=filename,
174
  chat_id=chat_id
175
  )
176
-
177
- # Remove the local file after upload
178
  os.remove(filepath)
179
-
180
  return public_url
181
  except Exception as e:
182
- # Clean up local file if upload fails
183
  if os.path.exists(filepath):
184
  os.remove(filepath)
185
  raise Exception(f"Failed to upload plot to Supabase: {e}")
186
 
187
  def _format_result(self, result: Any) -> str:
188
- """Format the result for display"""
189
  if isinstance(result, (pd.DataFrame, pd.Series)):
190
- # Convert DataFrame to a string, then to a list of dicts (handles NumPy types)
191
- json_str = result.to_json(orient='records', date_format='iso')
192
- json.dumps(json.loads(json_str), indent=2) # Re-parse for pretty formatting
193
  elif isinstance(result, (dict, list)):
194
  return json.dumps(result, indent=2)
195
- return str(result)
 
 
 
 
 
196
 
197
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
198
- """Process the response with proper variable handling"""
199
- output_parts = [response.casual_response]
200
-
201
- # Process analysis operations first
202
- for operation in response.analysis_operations:
203
- execution_result = self.execute_code(operation.code.code)
204
 
205
- # Get the result from locals
206
- result = self.exec_locals.get(operation.result_var)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
- if execution_result['error']:
209
- output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
210
- output_parts.append("```python\n" + execution_result['error']['message'] + "\n```")
211
- elif result is not None:
212
- # Handle empty/None results
213
- if result is None or (hasattr(result, '__len__') and len(result) == 0):
214
- output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
215
- else:
216
- output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
217
- output_parts.append("```python\n" + self._format_result(result) + "\n```")
218
- else:
219
- output_str = execution_result['output'].strip()
220
- if output_str:
221
- output_parts.append(f"\nOutput for '{operation.result_var}':")
222
- output_parts.append("```\n" + output_str + "\n```")
223
-
224
-
225
- # Process charts after all operations
226
- if response.charts:
227
- output_parts.append("\n📊 Visualizations:")
228
- for chart in response.charts:
229
- if chart.code:
230
- chart_result = self.execute_code(chart.code)
231
- if chart_result['plots']:
232
- for plot_data in chart_result['plots']:
233
- try:
234
- public_url = await self.save_plot_to_supabase(
235
- plot_data=plot_data,
236
- description=chart.image_description,
237
- chat_id=chat_id
238
- )
239
- output_parts.append(f"\n🖼️ {chart.image_description}")
240
- output_parts.append(f"![{chart.image_description}]({public_url})")
241
- except Exception as e:
242
- output_parts.append(f"\n⚠️ Values are missing - Error uploading chart: {str(e)}")
243
- elif chart_result['error']:
244
- output_parts.append("```python\n" + f"Error generating {chart.image_description}: {chart_result['error']['message']}" + "\n```")
245
- else:
246
- output_parts.append(f"\n⚠️ Values are missing - No chart generated for '{chart.image_description}'")
247
-
248
- return "\n".join(output_parts)
 
16
  import scipy.stats as stats
17
  from pydantic import BaseModel
18
  from tabulate import tabulate
19
+ import re
20
 
21
  from supabase_service import upload_file_to_supabase
22
 
 
28
  language: str = "python"
29
  code: str
30
 
31
+ def clean_code(self) -> str:
32
+ """Remove trailing newlines while preserving internal structure"""
33
+ return self.code.rstrip('\n')
34
+
35
 
36
  class ChartSpecification(BaseModel):
37
  """Details about requested charts"""
38
  image_description: str
39
  code: Optional[str] = None
40
 
41
+ def clean_description(self) -> str:
42
+ """Replace newlines in description with spaces (preserves readability)"""
43
+ return self.image_description.replace('\n', ' ').strip()
44
+
45
 
46
  class AnalysisOperation(BaseModel):
47
  """Container for a single analysis operation with its code and result"""
 
56
  analysis_operations: List[AnalysisOperation]
57
  charts: Optional[List[ChartSpecification]] = None
58
 
59
+ def clean_casual_response(self) -> str:
60
+ """Clean casual response by replacing newlines with spaces when appropriate"""
61
+ # Preserve intentional line breaks (markdown-style)
62
+ if '\n\n' in self.casual_response:
63
+ return self.casual_response
64
+ return self.casual_response.replace('\n', ' ')
65
+
66
 
67
  class PythonExecutor:
68
  """Handles execution of Python code with comprehensive data analysis libraries"""
69
 
70
  def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
 
 
 
 
 
 
 
71
  self.df = df
72
  self.charts_folder = Path(charts_folder)
73
  self.charts_folder.mkdir(exist_ok=True)
74
  self.exec_locals = {}
75
 
76
  def execute_code(self, code: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
77
  output = ""
78
  error = None
79
  plots = []
 
 
80
  stdout = io.StringIO()
 
 
81
  original_show = plt.show
82
 
83
  def custom_show():
 
84
  for i, fig in enumerate(plt.get_fignums()):
85
  figure = plt.figure(fig)
 
86
  buf = io.BytesIO()
87
  figure.savefig(buf, format='png', bbox_inches='tight')
88
  buf.seek(0)
 
90
  plt.close('all')
91
 
92
  try:
 
93
  exec_globals = {
94
+ 'pd': pd, 'np': np, 'df': self.df,
95
+ 'plt': plt, 'sns': sns, 'tabulate': tabulate,
96
+ 'stats': stats, 'datetime': datetime,
97
+ 'timedelta': timedelta, 'time': time,
98
+ 'json': json, '__builtins__': __builtins__,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
 
 
101
  plt.show = custom_show
 
 
102
  with contextlib.redirect_stdout(stdout):
103
  exec(code, exec_globals, self.exec_locals)
 
104
  output = stdout.getvalue()
105
 
106
  except Exception as e:
107
+ error = {"message": str(e), "traceback": traceback.format_exc()}
 
 
 
108
  finally:
 
109
  plt.show = original_show
110
 
111
  return {
 
116
  }
117
 
118
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
119
  filename = f"chart_{uuid.uuid4().hex}.png"
120
  filepath = self.charts_folder / filename
121
 
 
122
  with open(filepath, 'wb') as f:
123
  f.write(plot_data)
124
 
125
  try:
 
126
  public_url = await upload_file_to_supabase(
127
  file_path=str(filepath),
128
  file_name=filename,
129
  chat_id=chat_id
130
  )
 
 
131
  os.remove(filepath)
 
132
  return public_url
133
  except Exception as e:
 
134
  if os.path.exists(filepath):
135
  os.remove(filepath)
136
  raise Exception(f"Failed to upload plot to Supabase: {e}")
137
 
138
  def _format_result(self, result: Any) -> str:
139
+ """Format result with safe newline handling"""
140
  if isinstance(result, (pd.DataFrame, pd.Series)):
141
+ return result.to_string()
 
 
142
  elif isinstance(result, (dict, list)):
143
  return json.dumps(result, indent=2)
144
+
145
+ # Clean string representation while preserving essential newlines
146
+ str_result = str(result)
147
+ if '\n' in str_result and not any(x in str_result for x in ['```', 'def ', 'class ']):
148
+ return str_result.replace('\n', ' ')
149
+ return str_result
150
 
151
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
152
+ """Process response with intelligent newline handling"""
153
+ output_parts = [response.clean_casual_response()]
 
 
 
 
154
 
155
+ # Process analysis operations
156
+ for operation in response.analysis_operations:
157
+ execution_result = self.execute_code(operation.code.clean_code())
158
+ result = self.exec_locals.get(operation.result_var)
159
+
160
+ if execution_result['error']:
161
+ output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
162
+ output_parts.append(f"```python\n{execution_result['error']['message']}\n```")
163
+ elif result is not None:
164
+ if result is None or (hasattr(result, '__len__') and len(result) == 0):
165
+ output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
166
+ else:
167
+ output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
168
+ output_parts.append(f"```python\n{self._format_result(result)}\n```")
169
+ else:
170
+ output_str = execution_result['output'].strip()
171
+ if output_str:
172
+ output_parts.append(f"\nOutput for '{operation.result_var}':")
173
+ output_parts.append(f"```\n{output_str}\n```")
174
 
175
+ # Process charts
176
+ if response.charts:
177
+ output_parts.append("\n📊 Visualizations:")
178
+ for chart in response.charts:
179
+ if chart.code:
180
+ chart_result = self.execute_code(chart.code)
181
+ if chart_result['plots']:
182
+ for plot_data in chart_result['plots']:
183
+ try:
184
+ public_url = await self.save_plot_to_supabase(
185
+ plot_data=plot_data,
186
+ description=chart.clean_description(),
187
+ chat_id=chat_id
188
+ )
189
+ output_parts.append(f"\n🖼️ {chart.clean_description()}")
190
+ output_parts.append(f"![{chart.clean_description()}]({public_url})")
191
+ except Exception as e:
192
+ output_parts.append(f"\n⚠️ Error uploading chart: {str(e)}")
193
+ elif chart_result['error']:
194
+ output_parts.append(f"```python\nError generating chart: {chart_result['error']['message']}\n```")
195
+ else:
196
+ output_parts.append(f"\n⚠️ No chart generated for '{chart.clean_description()}'")
197
+
198
+ return "\n".join(output_parts)