Spaces:

MLSB
/

strong_docking_baseline

Sleeping

App Files Files Community

strong_docking_baseline / inference_app.py

simonduerr

Update inference_app.py

98054e8 verified about 1 year ago

raw

history blame

5.62 kB

	# Runs the full strong baseline, including smina/vina docking,
	# gnina rescoring, and an input conformational ensemble.
	import argparse
	import os
	import shutil
	import subprocess

	import pandas as pd
	from rdkit import Chem
	from rdkit.Chem import AllChem, PandasTools, rdMolTransforms

	import numpy as np
	from moleculekit.molecule import Molecule

	import time

	import gradio as gr

	from gradio_molecule3d import Molecule3D

	def protonate_receptor_and_ligand(protein,ligand):
	protein_out = protein.replace(".pdb","_H.pdb")
	with open(protein_out, "w") as f:
	subprocess.run(
	["reduce", "-BUILD", protein],
	stdout=f,
	stderr=subprocess.DEVNULL,
	)
	ligand_out = ligand.replace(".pdb","_H.pdb")
	subprocess.run(["obabel", ligand, "-O", ligand_out, "-p", "7.4"])


	def generate_conformers(ligand, num_confs=8):
	mol = Chem.MolFromMolFile(
	ligand.replace(".pdb","_H.pdb")
	)
	mol.RemoveAllConformers()
	mol = Chem.AddHs(mol)
	AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, randomSeed=1)
	AllChem.UFFOptimizeMoleculeConfs(mol)
	with Chem.SDWriter(
	ligand.replace(".pdb","_multiple_confs.pdb")
	) as writer:
	for cid in range(mol.GetNumConformers()):
	writer.write(mol, confId=cid)

	def get_bb(points):
	"""Return bounding box from a set of points (N,3)

	Parameters
	----------
	points : numpy.ndarray
	Set of points (N,3)

	Returns
	-------
	boundingBox : list
	List of the form [xmin, xmax, ymin, ymax, zmin, zmax]

	"""
	minx = np.min(points[:, 0])
	maxx = np.max(points[:, 0])

	miny = np.min(points[:, 1])
	maxy = np.max(points[:, 1])

	minz = np.min(points[:, 2])
	maxz = np.max(points[:, 2])
	bb = [[minx, miny, minz], [maxx, maxy, maxz]]
	return bb

	def run_docking(protein, ligand):

	mol = Molecule(protein)
	mol.center()
	bb = get_bb(mol.coords)
	size_x = bb[1][0] - bb[0][0]
	size_y = bb[1][1] - bb[0][1]
	size_z = bb[1][2] - bb[0][2]

	subprocess.run(
	[
	"gnina",
	"-r",
	protein.replace(".pdb","_H.pdb"),
	"-l",
	ligand.replace(".sdf","_ligand_multiple_confs.sdf"),
	"-o",
	ligand.replace(".sdf","_multiple_confs_poses.sdf"),
	"--center_x", # bounding box matching PoseBusters methodology
	str(0),
	"--center_y",
	str(0),
	"--center_z",
	str(0),
	"--size_x",
	str(size_x),
	"--size_y",
	str(size_y),
	"--size_z",
	str(size_z),
	"--scoring",
	"vina",
	"--exhaustiveness",
	"4",
	"--num_modes",
	"1",
	"--seed",
	"1",
	]
	)
	# sort the poses from the multiple conformation runs, so overall best is first
	poses = PandasTools.LoadSDF(
	ligand.replace(".sdf","_multiple_confs_poses.sdf")
	)
	poses["CNNscore"] = poses["CNNscore"].astype(float)
	gnina_order = poses.sort_values("CNNscore", ascending=False).reset_index(drop=True)
	PandasTools.WriteSDF(
	gnina_order,
	ligand.replace(".sdf","_multiple_confs_poses.sdf"),
	properties=list(poses.columns),
	)
	return poses["CNNscore"]


	def predict (input_sequence, input_ligand,input_msa, input_protein):
	start_time = time.time()

	protonate_receptor_and_ligand(input_protein, input_ligand)
	generate_conformers(input_protein, input_ligand)
	cnn_score = run_docking(input_protein, input_ligand)
	metrics = {"cnn_score": cnn_score}
	end_time = time.time()
	run_time = end_time - start_time
	return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time

	with gr.Blocks() as app:

	gr.Markdown("# Template for inference")

	gr.Markdown("Title, description, and other information about the model")
	with gr.Row():
	input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
	input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
	with gr.Row():
	input_msa = gr.File(label="Input Protein MSA (A3M)")
	input_protein = gr.File(label="Input protein monomer")


	# define any options here

	# for automated inference the default options are used
	# slider_option = gr.Slider(0,10, label="Slider Option")
	# checkbox_option = gr.Checkbox(label="Checkbox Option")
	# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

	btn = gr.Button("Run Inference")

	gr.Examples(
	[
	[
	"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
	"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
	"test_out.pdb"
	],
	],
	[input_sequence, input_ligand, input_protein],
	)
	reps = [
	{
	"model": 0,
	"style": "cartoon",
	"color": "whiteCarbon",
	},
	{
	"model": 1,
	"style": "stick",
	"color": "greenCarbon",
	}

	]

	out = Molecule3D(reps=reps)
	metrics = gr.JSON(label="Metrics")
	run_time = gr.Textbox(label="Runtime")

	btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out,metrics, run_time])

	app.launch()