Soumik555 commited on
Commit
dab2720
·
1 Parent(s): 7d5e274

DB Chat for pro users only

Browse files
Files changed (1) hide show
  1. python_code_executor_service.py +128 -103
python_code_executor_service.py CHANGED
@@ -16,69 +16,23 @@ import seaborn as sns
16
  import scipy.stats as stats
17
  from pydantic import BaseModel
18
  from tabulate import tabulate
19
- import re
20
 
21
  from supabase_service import upload_file_to_supabase
22
 
23
  # Load environment variables from .env file
24
  load_dotenv()
25
 
26
-
27
- class TextCleaner:
28
- """Utility class for cleaning text while preserving structure"""
29
-
30
- @staticmethod
31
- def clean_casual_response(text: str) -> str:
32
- """Clean casual response by replacing newlines with spaces when appropriate"""
33
- # Preserve intentional line breaks (markdown-style)
34
- if '\n\n' in text:
35
- return text
36
- return text.replace('\n', ' ').strip()
37
-
38
- @staticmethod
39
- def clean_code(code: str) -> str:
40
- """Remove trailing newlines while preserving internal structure"""
41
- return code.rstrip('\n')
42
-
43
- @staticmethod
44
- def clean_description(description: str) -> str:
45
- """Replace newlines in description with spaces (preserves readability)"""
46
- return description.replace('\n', ' ').strip()
47
-
48
- @staticmethod
49
- def format_result(result: Any) -> str:
50
- """Format result with safe newline handling"""
51
- if isinstance(result, (pd.DataFrame, pd.Series)):
52
- return result.to_string()
53
- if isinstance(result, (dict, list)):
54
- return json.dumps(result, indent=2)
55
-
56
- # Clean string representation while preserving essential newlines
57
- str_result = str(result)
58
- if '\n' in str_result and not any(x in str_result for x in ['```', 'def ', 'class ']):
59
- return str_result.replace('\n', ' ')
60
- return str_result
61
-
62
-
63
  class CodeResponse(BaseModel):
64
  """Container for code-related responses"""
65
  language: str = "python"
66
  code: str
67
 
68
- def clean_code(self) -> str:
69
- """Delegate to TextCleaner"""
70
- return TextCleaner.clean_code(self.code)
71
-
72
 
73
  class ChartSpecification(BaseModel):
74
  """Details about requested charts"""
75
  image_description: str
76
  code: Optional[str] = None
77
 
78
- def clean_description(self) -> str:
79
- """Delegate to TextCleaner"""
80
- return TextCleaner.clean_description(self.image_description)
81
-
82
 
83
  class AnalysisOperation(BaseModel):
84
  """Container for a single analysis operation with its code and result"""
@@ -93,30 +47,48 @@ class CsvChatResult(BaseModel):
93
  analysis_operations: List[AnalysisOperation]
94
  charts: Optional[List[ChartSpecification]] = None
95
 
96
- def clean_casual_response(self) -> str:
97
- """Delegate to TextCleaner"""
98
- return TextCleaner.clean_casual_response(self.casual_response)
99
-
100
 
101
  class PythonExecutor:
102
  """Handles execution of Python code with comprehensive data analysis libraries"""
103
 
104
  def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
 
 
 
 
 
 
 
105
  self.df = df
106
  self.charts_folder = Path(charts_folder)
107
  self.charts_folder.mkdir(exist_ok=True)
108
  self.exec_locals = {}
109
 
110
  def execute_code(self, code: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
111
  output = ""
112
  error = None
113
  plots = []
 
 
114
  stdout = io.StringIO()
 
 
115
  original_show = plt.show
116
 
117
  def custom_show():
 
118
  for i, fig in enumerate(plt.get_fignums()):
119
  figure = plt.figure(fig)
 
120
  buf = io.BytesIO()
121
  figure.savefig(buf, format='png', bbox_inches='tight')
122
  buf.seek(0)
@@ -124,22 +96,47 @@ class PythonExecutor:
124
  plt.close('all')
125
 
126
  try:
 
127
  exec_globals = {
128
- 'pd': pd, 'np': np, 'df': self.df,
129
- 'plt': plt, 'sns': sns, 'tabulate': tabulate,
130
- 'stats': stats, 'datetime': datetime,
131
- 'timedelta': timedelta, 'time': time,
132
- 'json': json, '__builtins__': __builtins__,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
 
 
135
  plt.show = custom_show
 
 
136
  with contextlib.redirect_stdout(stdout):
137
  exec(code, exec_globals, self.exec_locals)
 
138
  output = stdout.getvalue()
139
 
140
  except Exception as e:
141
- error = {"message": str(e), "traceback": traceback.format_exc()}
 
 
 
142
  finally:
 
143
  plt.show = original_show
144
 
145
  return {
@@ -150,74 +147,102 @@ class PythonExecutor:
150
  }
151
 
152
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
153
  filename = f"chart_{uuid.uuid4().hex}.png"
154
  filepath = self.charts_folder / filename
155
 
 
156
  with open(filepath, 'wb') as f:
157
  f.write(plot_data)
158
 
159
  try:
 
160
  public_url = await upload_file_to_supabase(
161
  file_path=str(filepath),
162
  file_name=filename,
163
  chat_id=chat_id
164
  )
 
 
165
  os.remove(filepath)
 
166
  return public_url
167
  except Exception as e:
 
168
  if os.path.exists(filepath):
169
  os.remove(filepath)
170
  raise Exception(f"Failed to upload plot to Supabase: {e}")
171
 
172
  def _format_result(self, result: Any) -> str:
173
- """Delegate to TextCleaner"""
174
- return TextCleaner.format_result(result)
 
 
 
 
 
 
175
 
176
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
177
- """Process response with intelligent newline handling"""
178
- output_parts = [response.clean_casual_response()]
179
-
180
- # Process analysis operations
181
- for operation in response.analysis_operations:
182
- execution_result = self.execute_code(operation.code.clean_code())
183
- result = self.exec_locals.get(operation.result_var)
184
-
185
- if execution_result['error']:
186
- output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
187
- output_parts.append(f"```python\n{execution_result['error']['message']}\n```")
188
- elif result is not None:
189
- if result is None or (hasattr(result, '__len__') and len(result) == 0):
190
- output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
191
- else:
192
- output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
193
- output_parts.append(f"```python\n{self._format_result(result)}\n```")
194
- else:
195
- output_str = execution_result['output'].strip()
196
- if output_str:
197
- output_parts.append(f"\nOutput for '{operation.result_var}':")
198
- output_parts.append(f"```\n{output_str}\n```")
199
 
200
- # Process charts
201
- if response.charts:
202
- output_parts.append("\n📊 Visualizations:")
203
- for chart in response.charts:
204
- if chart.code:
205
- chart_result = self.execute_code(chart.code)
206
- if chart_result['plots']:
207
- for plot_data in chart_result['plots']:
208
- try:
209
- public_url = await self.save_plot_to_supabase(
210
- plot_data=plot_data,
211
- description=chart.clean_description(),
212
- chat_id=chat_id
213
- )
214
- output_parts.append(f"\n🖼️ {chart.clean_description()}")
215
- output_parts.append(f"![{chart.clean_description()}]({public_url})")
216
- except Exception as e:
217
- output_parts.append(f"\n⚠️ Error uploading chart: {str(e)}")
218
- elif chart_result['error']:
219
- output_parts.append(f"```python\nError generating chart: {chart_result['error']['message']}\n```")
220
- else:
221
- output_parts.append(f"\n⚠️ No chart generated for '{chart.clean_description()}'")
222
 
223
- return "\n".join(output_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  import scipy.stats as stats
17
  from pydantic import BaseModel
18
  from tabulate import tabulate
 
19
 
20
  from supabase_service import upload_file_to_supabase
21
 
22
  # Load environment variables from .env file
23
  load_dotenv()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  class CodeResponse(BaseModel):
26
  """Container for code-related responses"""
27
  language: str = "python"
28
  code: str
29
 
 
 
 
 
30
 
31
  class ChartSpecification(BaseModel):
32
  """Details about requested charts"""
33
  image_description: str
34
  code: Optional[str] = None
35
 
 
 
 
 
36
 
37
  class AnalysisOperation(BaseModel):
38
  """Container for a single analysis operation with its code and result"""
 
47
  analysis_operations: List[AnalysisOperation]
48
  charts: Optional[List[ChartSpecification]] = None
49
 
 
 
 
 
50
 
51
  class PythonExecutor:
52
  """Handles execution of Python code with comprehensive data analysis libraries"""
53
 
54
  def __init__(self, df: pd.DataFrame, charts_folder: str = "generated_charts"):
55
+ """
56
+ Initialize the PythonExecutor with a DataFrame
57
+
58
+ Args:
59
+ df (pd.DataFrame): The DataFrame to operate on
60
+ charts_folder (str): Folder to save charts in
61
+ """
62
  self.df = df
63
  self.charts_folder = Path(charts_folder)
64
  self.charts_folder.mkdir(exist_ok=True)
65
  self.exec_locals = {}
66
 
67
  def execute_code(self, code: str) -> Dict[str, Any]:
68
+ """
69
+ Execute Python code with full data analysis context and return results
70
+
71
+ Args:
72
+ code (str): Python code to execute
73
+
74
+ Returns:
75
+ dict: Dictionary containing execution results and any generated plots
76
+ """
77
  output = ""
78
  error = None
79
  plots = []
80
+
81
+ # Capture stdout
82
  stdout = io.StringIO()
83
+
84
+ # Monkey patch plt.show() to save figures
85
  original_show = plt.show
86
 
87
  def custom_show():
88
+ """Custom show function that saves plots instead of displaying them"""
89
  for i, fig in enumerate(plt.get_fignums()):
90
  figure = plt.figure(fig)
91
+ # Save plot to bytes buffer
92
  buf = io.BytesIO()
93
  figure.savefig(buf, format='png', bbox_inches='tight')
94
  buf.seek(0)
 
96
  plt.close('all')
97
 
98
  try:
99
+ # Create comprehensive execution context with data analysis libraries
100
  exec_globals = {
101
+ # Core data analysis
102
+ 'pd': pd,
103
+ 'np': np,
104
+ 'df': self.df,
105
+
106
+ # Visualization
107
+ 'plt': plt,
108
+ 'sns': sns,
109
+ 'tabulate': tabulate,
110
+
111
+ # Statistics
112
+ 'stats': stats,
113
+
114
+ # Date/time
115
+ 'datetime': datetime,
116
+ 'timedelta': timedelta,
117
+ 'time': time,
118
+
119
+ # Utilities
120
+ 'json': json,
121
+ '__builtins__': __builtins__,
122
  }
123
 
124
+ # Replace plt.show with custom implementation
125
  plt.show = custom_show
126
+
127
+ # Execute code and capture output
128
  with contextlib.redirect_stdout(stdout):
129
  exec(code, exec_globals, self.exec_locals)
130
+
131
  output = stdout.getvalue()
132
 
133
  except Exception as e:
134
+ error = {
135
+ "message": str(e),
136
+ "traceback": traceback.format_exc()
137
+ }
138
  finally:
139
+ # Restore original plt.show
140
  plt.show = original_show
141
 
142
  return {
 
147
  }
148
 
149
  async def save_plot_to_supabase(self, plot_data: bytes, description: str, chat_id: str) -> str:
150
+ """
151
+ Save plot to Supabase storage and return the public URL
152
+
153
+ Args:
154
+ plot_data (bytes): Image data in bytes
155
+ description (str): Description of the plot
156
+ chat_id (str): ID of the chat session
157
+
158
+ Returns:
159
+ str: Public URL of the uploaded chart
160
+ """
161
+ # Generate unique filename
162
  filename = f"chart_{uuid.uuid4().hex}.png"
163
  filepath = self.charts_folder / filename
164
 
165
+ # Save the plot locally first
166
  with open(filepath, 'wb') as f:
167
  f.write(plot_data)
168
 
169
  try:
170
+ # Upload to Supabase
171
  public_url = await upload_file_to_supabase(
172
  file_path=str(filepath),
173
  file_name=filename,
174
  chat_id=chat_id
175
  )
176
+
177
+ # Remove the local file after upload
178
  os.remove(filepath)
179
+
180
  return public_url
181
  except Exception as e:
182
+ # Clean up local file if upload fails
183
  if os.path.exists(filepath):
184
  os.remove(filepath)
185
  raise Exception(f"Failed to upload plot to Supabase: {e}")
186
 
187
  def _format_result(self, result: Any) -> str:
188
+ """Format the result for display"""
189
+ if isinstance(result, (pd.DataFrame, pd.Series)):
190
+ # Convert DataFrame to a string, then to a list of dicts (handles NumPy types)
191
+ json_str = result.to_json(orient='records', date_format='iso')
192
+ json.dumps(json.loads(json_str), indent=2) # Re-parse for pretty formatting
193
+ elif isinstance(result, (dict, list)):
194
+ return json.dumps(result, indent=2)
195
+ return str(result)
196
 
197
  async def process_response(self, response: CsvChatResult, chat_id: str) -> str:
198
+ """Process the response with proper variable handling"""
199
+ output_parts = [response.casual_response]
200
+
201
+ # Process analysis operations first
202
+ for operation in response.analysis_operations:
203
+ execution_result = self.execute_code(operation.code.code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # Get the result from locals
206
+ result = self.exec_locals.get(operation.result_var)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ if execution_result['error']:
209
+ output_parts.append(f"\n❌ Error in operation '{operation.result_var}':")
210
+ output_parts.append("```python\n" + execution_result['error']['message'] + "\n```")
211
+ elif result is not None:
212
+ # Handle empty/None results
213
+ if result is None or (hasattr(result, '__len__') and len(result) == 0):
214
+ output_parts.append(f"\n⚠️ Values are missing - Operation '{operation.result_var}' returned no data")
215
+ else:
216
+ output_parts.append(f"\n🔹 Result for '{operation.result_var}':")
217
+ output_parts.append("```python\n" + self._format_result(result) + "\n```")
218
+ else:
219
+ output_str = execution_result['output'].strip()
220
+ if output_str:
221
+ output_parts.append(f"\nOutput for '{operation.result_var}':")
222
+ output_parts.append("```\n" + output_str + "\n```")
223
+
224
+
225
+ # Process charts after all operations
226
+ if response.charts:
227
+ output_parts.append("\n📊 Visualizations:")
228
+ for chart in response.charts:
229
+ if chart.code:
230
+ chart_result = self.execute_code(chart.code)
231
+ if chart_result['plots']:
232
+ for plot_data in chart_result['plots']:
233
+ try:
234
+ public_url = await self.save_plot_to_supabase(
235
+ plot_data=plot_data,
236
+ description=chart.image_description,
237
+ chat_id=chat_id
238
+ )
239
+ output_parts.append(f"\n🖼️ {chart.image_description}")
240
+ output_parts.append(f"![{chart.image_description}]({public_url})")
241
+ except Exception as e:
242
+ output_parts.append(f"\n⚠️ Values are missing - Error uploading chart: {str(e)}")
243
+ elif chart_result['error']:
244
+ output_parts.append("```python\n" + f"Error generating {chart.image_description}: {chart_result['error']['message']}" + "\n```")
245
+ else:
246
+ output_parts.append(f"\n⚠️ Values are missing - No chart generated for '{chart.image_description}'")
247
+
248
+ return "\n".join(output_parts)