yinuozhang commited on
Commit
b871fd6
1 Parent(s): 45d6af3

add more functions

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +256 -42
  3. requirements.txt +4 -0
README.md CHANGED
@@ -24,7 +24,7 @@ This app analyzes peptide sequences from SMILES notation, identifying:
24
  The app will return:
25
  - The parsed peptide sequence
26
  - Whether the peptide is cyclic
27
- - Any modifications present
28
 
29
  ## Examples
30
  Try the provided example SMILES strings to see how the analyzer works.
 
24
  The app will return:
25
  - The parsed peptide sequence
26
  - Whether the peptide is cyclic
27
+ - Visualize the peptide
28
 
29
  ## Examples
30
  Try the provided example SMILES strings to see how the analyzer works.
app.py CHANGED
@@ -2,6 +2,37 @@ import gradio as gr
2
  import re
3
  import pandas as pd
4
  from io import StringIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def remove_nested_branches(smiles):
7
  """Remove nested branches from SMILES string"""
@@ -15,6 +46,7 @@ def remove_nested_branches(smiles):
15
  elif depth == 0:
16
  result += char
17
  return result
 
18
  def identify_linkage_type(segment):
19
  """
20
  Identify the type of linkage between residues
@@ -189,53 +221,219 @@ def analyze_single_smiles(smiles):
189
  sequence = parse_peptide(smiles)
190
 
191
  details = {
192
- 'SMILES': smiles,
193
  'Sequence': sequence,
194
  'Is Cyclic': 'Yes' if is_cyclic else 'No',
195
- 'Peptide Cycles': ', '.join(peptide_cycles) if peptide_cycles else 'None',
196
- 'Aromatic Cycles': ', '.join(aromatic_cycles) if aromatic_cycles else 'None'
197
  }
198
  return details
199
 
200
  except Exception as e:
201
  return {
202
- 'SMILES': smiles,
203
  'Sequence': f'Error: {str(e)}',
204
  'Is Cyclic': 'Error',
205
- 'Peptide Cycles': 'Error',
206
- 'Aromatic Cycles': 'Error'
207
  }
208
 
209
- def process_input(smiles_input=None, file_obj=None):
210
- """Process either direct SMILES input or file input"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  results = []
 
212
 
213
  # Handle direct SMILES input
214
  if smiles_input:
215
- result = analyze_single_smiles(smiles_input.strip())
216
- results.append(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  # Handle file input
219
  if file_obj is not None:
220
- content = file_obj.decode('utf-8')
221
- for line in StringIO(content):
222
- smiles = line.strip()
223
- if smiles: # Skip empty lines
224
- result = analyze_single_smiles(smiles)
225
- results.append(result)
226
-
227
- # Create formatted output
228
- output_text = ""
229
- for i, result in enumerate(results, 1):
230
- output_text += f"Entry {i}:\n"
231
- output_text += f"SMILES: {result['SMILES']}\n"
232
- output_text += f"Sequence: {result['Sequence']}\n"
233
- output_text += f"Is Cyclic: {result['Is Cyclic']}\n"
234
- output_text += f"Peptide Cycles: {result['Peptide Cycles']}\n"
235
- output_text += f"Aromatic Cycles: {result['Aromatic Cycles']}\n"
236
- output_text += "-" * 50 + "\n"
237
-
238
- return output_text
239
 
240
  # Create Gradio interface
241
  iface = gr.Interface(
@@ -250,30 +448,46 @@ iface = gr.Interface(
250
  label="Or upload a text file with SMILES",
251
  file_types=[".txt"],
252
  type="binary"
 
 
 
 
253
  )
254
  ],
255
- outputs=gr.Textbox(
256
- label="Analysis Results",
257
- lines=10
258
- ),
259
- title="Peptide Structure Analyzer",
 
 
 
 
 
 
 
 
 
 
 
260
  description="""
261
- Analyze peptide structures from SMILES notation to:
262
- 1. Determine if the peptide is cyclic
263
- 2. Identify peptide cycles vs aromatic rings
264
- 3. Parse the amino acid sequence
 
 
265
 
266
- Input: Either enter a SMILES string directly or upload a text file with multiple SMILES (one per line)
267
  """,
268
  examples=[
269
  # Example cyclic peptide with Proline
270
- ["CC(C)C[C@@H]1NC(=O)[C@@H]2CCCN2C(=O)[C@@H](CC(C)C)NC(=O)[C@@H](CC(C)C)N(C)C(=O)[C@H](C)NC(=O)[C@H](Cc2ccccc2)NC1=O", None],
271
  # Example cyclic peptide with ester bond
272
- ["CC(C)C[C@@H]1OC(=O)[C@H](C)NC(=O)[C@H](C(C)C)OC(=O)[C@H](C)N(C)C(=O)[C@@H](C)NC(=O)[C@@H](Cc2ccccc2)N(C)C1=O", None]
273
  ],
274
  allow_flagging="never"
275
  )
276
-
277
  # Launch the app
278
  if __name__ == "__main__":
279
  iface.launch()
 
2
  import re
3
  import pandas as pd
4
  from io import StringIO
5
+ import rdkit
6
+ from rdkit import Chem
7
+ from rdkit.Chem import AllChem, Draw
8
+ import numpy as np
9
+ from PIL import Image, ImageDraw, ImageFont
10
+ import matplotlib.pyplot as plt
11
+ import matplotlib.patches as patches
12
+ from io import BytesIO
13
+
14
+ def is_peptide(smiles):
15
+ """Check if the SMILES represents a peptide by looking for peptide bonds"""
16
+ mol = Chem.MolFromSmiles(smiles)
17
+ if mol is None:
18
+ return False
19
+
20
+ # Look for peptide bonds: NC(=O) pattern
21
+ peptide_bond_pattern = Chem.MolFromSmarts('[NH][C](=O)')
22
+ if mol.HasSubstructMatch(peptide_bond_pattern):
23
+ return True
24
+
25
+ # Look for N-methylated peptide bonds: N(C)C(=O) pattern
26
+ n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
27
+ if mol.HasSubstructMatch(n_methyl_pattern):
28
+ return True
29
+
30
+ # Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
31
+ ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
32
+ if mol.HasSubstructMatch(ester_bond_pattern):
33
+ return True
34
+
35
+ return False
36
 
37
  def remove_nested_branches(smiles):
38
  """Remove nested branches from SMILES string"""
 
46
  elif depth == 0:
47
  result += char
48
  return result
49
+
50
  def identify_linkage_type(segment):
51
  """
52
  Identify the type of linkage between residues
 
221
  sequence = parse_peptide(smiles)
222
 
223
  details = {
224
+ #'SMILES': smiles,
225
  'Sequence': sequence,
226
  'Is Cyclic': 'Yes' if is_cyclic else 'No',
227
+ #'Peptide Cycles': ', '.join(peptide_cycles) if peptide_cycles else 'None',
228
+ #'Aromatic Cycles': ', '.join(aromatic_cycles) if aromatic_cycles else 'None'
229
  }
230
  return details
231
 
232
  except Exception as e:
233
  return {
234
+ #'SMILES': smiles,
235
  'Sequence': f'Error: {str(e)}',
236
  'Is Cyclic': 'Error',
237
+ #'Peptide Cycles': 'Error',
238
+ #'Aromatic Cycles': 'Error'
239
  }
240
 
241
+ def annotate_cyclic_structure(mol, sequence):
242
+ """Create annotated 2D structure with clear, non-overlapping residue labels"""
243
+ # Generate 2D coordinates
244
+ AllChem.Compute2DCoords(mol)
245
+
246
+ # Create drawer with larger size for annotations
247
+ drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000) # Even larger size
248
+
249
+ # Get residue list
250
+ if sequence.startswith('cyclo('):
251
+ residues = sequence[6:-1].split('-')
252
+ else:
253
+ residues = sequence.split('-')
254
+
255
+ # Draw molecule first to get its bounds
256
+ drawer.drawOptions().addAtomIndices = False
257
+ drawer.DrawMolecule(mol)
258
+ drawer.FinishDrawing()
259
+
260
+ # Convert to PIL Image
261
+ img = Image.open(BytesIO(drawer.GetDrawingText()))
262
+ draw = ImageDraw.Draw(img)
263
+ font = ImageFont.load_default(60)
264
+ small_font = ImageFont.load_default(60)
265
+
266
+ # Get molecule bounds
267
+ conf = mol.GetConformer()
268
+ positions = []
269
+ for i in range(mol.GetNumAtoms()):
270
+ pos = conf.GetAtomPosition(i)
271
+ positions.append((pos.x, pos.y))
272
+
273
+ x_coords = [p[0] for p in positions]
274
+ y_coords = [p[1] for p in positions]
275
+ min_x, max_x = min(x_coords), max(x_coords)
276
+ min_y, max_y = min(y_coords), max(y_coords)
277
+
278
+ # Calculate scaling factors
279
+ scale = 150 # Increased scale factor
280
+ center_x = 1000 # Image center
281
+ center_y = 1000
282
+
283
+ # Add residue labels in a circular arrangement around the structure
284
+ n_residues = len(residues)
285
+ radius = 700 # Distance of labels from center
286
+
287
+ for i, residue in enumerate(residues):
288
+ # Calculate position in a circle around the structure
289
+ angle = (2 * np.pi * i / n_residues) - np.pi/2 # Start from top
290
+
291
+ # Calculate label position
292
+ label_x = center_x + radius * np.cos(angle)
293
+ label_y = center_y + radius * np.sin(angle)
294
+
295
+ # Draw residue label
296
+ # Add white background for better visibility
297
+ text = f"{i+1}. {residue}"
298
+ bbox = draw.textbbox((label_x, label_y), text, font=font)
299
+ padding = 10
300
+ draw.rectangle([bbox[0]-padding, bbox[1]-padding,
301
+ bbox[2]+padding, bbox[3]+padding],
302
+ fill='white', outline='white')
303
+ draw.text((label_x, label_y), text,
304
+ font=font, fill='black', anchor="mm")
305
+
306
+ # Add sequence at the top with white background
307
+ seq_text = f"Sequence: {sequence}"
308
+ bbox = draw.textbbox((center_x, 100), seq_text, font=small_font)
309
+ padding = 10
310
+ draw.rectangle([bbox[0]-padding, bbox[1]-padding,
311
+ bbox[2]+padding, bbox[3]+padding],
312
+ fill='white', outline='white')
313
+ draw.text((center_x, 100), seq_text,
314
+ font=small_font, fill='black', anchor="mm")
315
+
316
+ return img
317
+
318
+ def create_linear_peptide_viz(sequence):
319
+ """
320
+ Create a linear representation of peptide with residue annotations
321
+ """
322
+ # Create figure and axis
323
+ fig, ax = plt.subplots(figsize=(15, 5))
324
+ ax.set_xlim(0, 10)
325
+ ax.set_ylim(0, 2)
326
+
327
+ # Parse sequence to get residues
328
+ if sequence.startswith('cyclo('):
329
+ residues = sequence[6:-1].split('-') # Remove cyclo() and split
330
+ else:
331
+ residues = sequence.split('-')
332
+
333
+ num_residues = len(residues)
334
+ spacing = 9.0 / (num_residues - 1) # Leave margins on sides
335
+
336
+ # Draw peptide backbone
337
+ y_pos = 1.5
338
+ for i in range(num_residues):
339
+ x_pos = 0.5 + i * spacing
340
+
341
+ # Draw amino acid box
342
+ rect = patches.Rectangle((x_pos-0.3, y_pos-0.2), 0.6, 0.4,
343
+ facecolor='lightblue', edgecolor='black')
344
+ ax.add_patch(rect)
345
+
346
+ # Draw peptide bond
347
+ if i < num_residues - 1:
348
+ ax.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
349
+ color='black', linestyle='-', linewidth=2)
350
+
351
+ # Add residue label with larger font
352
+ ax.text(x_pos, y_pos-0.5, residues[i], ha='center', va='top', fontsize=14)
353
+
354
+ # If cyclic, add arrow connecting ends
355
+ if sequence.startswith('cyclo('):
356
+ ax.annotate('', xy=(9.5, y_pos), xytext=(0.5, y_pos),
357
+ arrowprops=dict(arrowstyle='<->', color='red', lw=2))
358
+ ax.text(5, y_pos+0.3, 'Cyclic Connection', ha='center', color='red', fontsize=14)
359
+
360
+ # Add sequence at the top
361
+ ax.text(5, 1.9, f"Sequence: {sequence}", ha='center', va='bottom', fontsize=12)
362
+
363
+ # Remove axes
364
+ ax.set_xticks([])
365
+ ax.set_yticks([])
366
+ ax.axis('off')
367
+
368
+ return fig
369
+
370
+ def process_input(smiles_input=None, file_obj=None, show_linear=False):
371
+ """Process input and create visualizations"""
372
  results = []
373
+ images = []
374
 
375
  # Handle direct SMILES input
376
  if smiles_input:
377
+ smiles = smiles_input.strip()
378
+
379
+ # First check if it's a peptide
380
+ if not is_peptide(smiles):
381
+ return "Error: Input SMILES does not appear to be a peptide structure.", None, None
382
+
383
+ try:
384
+ # Create molecule
385
+ mol = Chem.MolFromSmiles(smiles)
386
+ if mol is None:
387
+ return "Error: Invalid SMILES notation.", None, None
388
+
389
+ # Get sequence and cyclic information
390
+ sequence = parse_peptide(smiles)
391
+ is_cyclic, peptide_cycles, aromatic_cycles = is_cyclic_peptide(smiles)
392
+
393
+ # Create cyclic structure visualization
394
+ img_cyclic = annotate_cyclic_structure(mol, sequence)
395
+
396
+ # Create linear representation if requested
397
+ img_linear = None
398
+ if show_linear:
399
+ fig_linear = create_linear_peptide_viz(sequence)
400
+
401
+ # Convert matplotlib figure to image
402
+ buf = BytesIO()
403
+ fig_linear.savefig(buf, format='png', bbox_inches='tight', dpi=300)
404
+ buf.seek(0)
405
+ img_linear = Image.open(buf)
406
+ plt.close(fig_linear)
407
+
408
+ # Format text output
409
+ output_text = f"Sequence: {sequence}\n"
410
+ output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
411
+
412
+ return output_text, img_cyclic, img_linear
413
+
414
+ except Exception as e:
415
+ return f"Error processing SMILES: {str(e)}", None, None
416
 
417
  # Handle file input
418
  if file_obj is not None:
419
+ try:
420
+ content = file_obj.decode('utf-8')
421
+ output_text = ""
422
+ for line in StringIO(content):
423
+ smiles = line.strip()
424
+ if smiles:
425
+ if not is_peptide(smiles):
426
+ output_text += f"Skipping non-peptide SMILES: {smiles}\n"
427
+ continue
428
+ result = analyze_single_smiles(smiles)
429
+ output_text += f"Sequence: {result['Sequence']}\n"
430
+ output_text += f"Is Cyclic: {result['Is Cyclic']}\n"
431
+ output_text += "-" * 50 + "\n"
432
+ return output_text, None, None
433
+ except Exception as e:
434
+ return f"Error processing file: {str(e)}", None, None
435
+
436
+ return "No input provided.", None, None
 
437
 
438
  # Create Gradio interface
439
  iface = gr.Interface(
 
448
  label="Or upload a text file with SMILES",
449
  file_types=[".txt"],
450
  type="binary"
451
+ ),
452
+ gr.Checkbox(
453
+ label="Show linear representation",
454
+ value=False
455
  )
456
  ],
457
+ outputs=[
458
+ gr.Textbox(
459
+ label="Analysis Results",
460
+ lines=10
461
+ ),
462
+ gr.Image(
463
+ label="2D Structure with Annotations",
464
+ type="pil"
465
+ ),
466
+ gr.Image(
467
+ label="Linear Representation",
468
+ type="pil",
469
+ visible=lambda x: x # Only show when checkbox is checked
470
+ )
471
+ ],
472
+ title="Peptide Structure Analyzer and Visualizer",
473
  description="""
474
+ Analyze and visualize peptide structures from SMILES notation:
475
+ 1. Validates if the input is a peptide structure
476
+ 2. Determines if the peptide is cyclic
477
+ 3. Parses the amino acid sequence
478
+ 4. Creates 2D structure visualization with residue annotations
479
+ 5. Optional linear representation
480
 
481
+ Input: Either enter a SMILES string directly or upload a text file
482
  """,
483
  examples=[
484
  # Example cyclic peptide with Proline
485
+ ["CC(C)C[C@@H]1NC(=O)[C@@H]2CCCN2C(=O)[C@@H](CC(C)C)NC(=O)[C@@H](CC(C)C)N(C)C(=O)[C@H](C)NC(=O)[C@H](Cc2ccccc2)NC1=O", None, True],
486
  # Example cyclic peptide with ester bond
487
+ ["CC(C)C[C@@H]1OC(=O)[C@H](C)NC(=O)[C@H](C(C)C)OC(=O)[C@H](C)N(C)C(=O)[C@@H](C)NC(=O)[C@@H](Cc2ccccc2)N(C)C1=O", None, True]
488
  ],
489
  allow_flagging="never"
490
  )
 
491
  # Launch the app
492
  if __name__ == "__main__":
493
  iface.launch()
requirements.txt CHANGED
@@ -1,2 +1,6 @@
1
  gradio==4.19.2
2
  pandas==2.2.0
 
 
 
 
 
1
  gradio==4.19.2
2
  pandas==2.2.0
3
+ rdkit==2023.9.1
4
+ Pillow==10.0.0
5
+ matplotlib==3.7.1
6
+ numpy>=1.24.3