Spaces:

Bhadralab
/

B3PPs_Predict

Sleeping

App Files Files Community

B3PPs_Predict / app.py

Bhadralab

Update app.py

34fd3ae verified 4 months ago

raw

history blame

2.72 kB

	import pandas as pd
	import torch
	import re
	import gradio as gr
	from io import StringIO
	from transformers import EsmForSequenceClassification, EsmTokenizer

	tokenizer = EsmTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D", do_lower_case=False)
	model = EsmForSequenceClassification.from_pretrained("model/best_model5")
	model.eval()

	def read_fasta(fasta_string):
	"""Parses FASTA format input and returns sequences with validation."""
	sequences = []
	headers = []
	seq_buffer = []
	header = None

	for line in StringIO(fasta_string):
	line = line.strip()
	if line.startswith(">"):
	if seq_buffer:
	sequences.append("".join(seq_buffer))
	seq_buffer.clear()
	header = line
	headers.append(header)
	else:
	if not re.match(r'^[ACDEFGHIKLMNPQRSTVWY]+$', line):
	raise ValueError("Invalid FASTA format: Sequences must contain only natural amino acids (ACDEFGHIKLMNPQRSTVWY).")
	if len(line) > 30:
	raise ValueError(f"Sequence too long: '{line}' (Max 30 characters allowed).")
	seq_buffer.append(line)

	if seq_buffer:
	sequences.append("".join(seq_buffer))

	return headers, sequences

	def predict_peptide_class(sequences):
	"""Predicts peptide classifications in batch."""
	inputs = tokenizer(sequences, return_tensors='pt')
	with torch.no_grad():
	logits = model(**inputs).logits
	probs = torch.sigmoid(logits)[:, 1].cpu().numpy()
	classes = ["B3PP" if p > 0.5 else "Non-B3PP" for p in probs]
	return probs, classes

	def predict_from_fasta(fasta_input):
	"""Processes FASTA input and returns predictions in a table format."""
	try:
	headers, sequences = read_fasta(fasta_input)
	if not sequences:
	return pd.DataFrame({"Error": ["No valid sequences found."]})

	probs, classes = predict_peptide_class(sequences)
	return pd.DataFrame({"Header": headers, "Sequence": sequences, "Probability": probs, "Predicted Class": classes})

	except ValueError as e:
	return pd.DataFrame({"Error": [str(e)]})
	except Exception:
	return pd.DataFrame({"Error": ["An unexpected error occurred."]})

	iface = gr.Interface(
	fn=predict_from_fasta,
	inputs=gr.Textbox(lines=10, placeholder="Paste your peptide sequences in FASTA format here"),
	outputs=gr.Dataframe(),
	title="B3PP Predictor",
	description="Submit peptide sequences in FASTA format to determine their potential as blood-brain barrier penetration peptides. Sequences must consist exclusively of natural amino acids in uppercase letters, with a maximum length of 30 characters."
	)

	iface.launch()