Spaces:
Sleeping
Sleeping
| # Runs the full strong baseline, including smina/vina docking, | |
| # gnina rescoring, and an input conformational ensemble. | |
| import argparse | |
| import os | |
| import shutil | |
| import subprocess | |
| import pandas as pd | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem, PandasTools, rdMolTransforms | |
| import numpy as np | |
| from moleculekit.molecule import Molecule | |
| import time | |
| import gradio as gr | |
| from gradio_molecule3d import Molecule3D | |
| def protonate_receptor_and_ligand(protein,ligand): | |
| protein_out = protein.replace(".pdb","_H.pdb") | |
| with open(protein_out, "w") as f: | |
| subprocess.run( | |
| ["reduce", "-BUILD", protein], | |
| stdout=f, | |
| stderr=subprocess.DEVNULL, | |
| ) | |
| ligand_out = ligand.replace(".pdb","_H.pdb") | |
| subprocess.run(["obabel", ligand, "-O", ligand_out, "-p", "7.4"]) | |
| def generate_conformers(ligand, num_confs=8): | |
| mol = Chem.MolFromMolFile( | |
| ligand.replace(".pdb","_H.pdb") | |
| ) | |
| mol.RemoveAllConformers() | |
| mol = Chem.AddHs(mol) | |
| AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, randomSeed=1) | |
| AllChem.UFFOptimizeMoleculeConfs(mol) | |
| with Chem.SDWriter( | |
| ligand.replace(".pdb","_multiple_confs.pdb") | |
| ) as writer: | |
| for cid in range(mol.GetNumConformers()): | |
| writer.write(mol, confId=cid) | |
| def get_bb(points): | |
| """Return bounding box from a set of points (N,3) | |
| Parameters | |
| ---------- | |
| points : numpy.ndarray | |
| Set of points (N,3) | |
| Returns | |
| ------- | |
| boundingBox : list | |
| List of the form [xmin, xmax, ymin, ymax, zmin, zmax] | |
| """ | |
| minx = np.min(points[:, 0]) | |
| maxx = np.max(points[:, 0]) | |
| miny = np.min(points[:, 1]) | |
| maxy = np.max(points[:, 1]) | |
| minz = np.min(points[:, 2]) | |
| maxz = np.max(points[:, 2]) | |
| bb = [[minx, miny, minz], [maxx, maxy, maxz]] | |
| return bb | |
| def run_docking(protein, ligand): | |
| mol = Molecule(protein) | |
| mol.center() | |
| bb = get_bb(mol.coords) | |
| size_x = bb[1][0] - bb[0][0] | |
| size_y = bb[1][1] - bb[0][1] | |
| size_z = bb[1][2] - bb[0][2] | |
| subprocess.run( | |
| [ | |
| "gnina", | |
| "-r", | |
| protein.replace(".pdb","_H.pdb"), | |
| "-l", | |
| ligand.replace(".sdf","_ligand_multiple_confs.sdf"), | |
| "-o", | |
| ligand.replace(".sdf","_multiple_confs_poses.sdf"), | |
| "--center_x", # bounding box matching PoseBusters methodology | |
| str(0), | |
| "--center_y", | |
| str(0), | |
| "--center_z", | |
| str(0), | |
| "--size_x", | |
| str(size_x), | |
| "--size_y", | |
| str(size_y), | |
| "--size_z", | |
| str(size_z), | |
| "--scoring", | |
| "vina", | |
| "--exhaustiveness", | |
| "4", | |
| "--num_modes", | |
| "1", | |
| "--seed", | |
| "1", | |
| ] | |
| ) | |
| # sort the poses from the multiple conformation runs, so overall best is first | |
| poses = PandasTools.LoadSDF( | |
| ligand.replace(".sdf","_multiple_confs_poses.sdf") | |
| ) | |
| poses["CNNscore"] = poses["CNNscore"].astype(float) | |
| gnina_order = poses.sort_values("CNNscore", ascending=False).reset_index(drop=True) | |
| PandasTools.WriteSDF( | |
| gnina_order, | |
| ligand.replace(".sdf","_multiple_confs_poses.sdf"), | |
| properties=list(poses.columns), | |
| ) | |
| return poses["CNNscore"] | |
| def predict (input_sequence, input_ligand,input_msa, input_protein): | |
| start_time = time.time() | |
| protonate_receptor_and_ligand(input_protein, input_ligand) | |
| generate_conformers(input_protein, input_ligand) | |
| cnn_score = run_docking(input_protein, input_ligand) | |
| metrics = {"cnn_score": cnn_score} | |
| end_time = time.time() | |
| run_time = end_time - start_time | |
| return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Template for inference") | |
| gr.Markdown("Title, description, and other information about the model") | |
| with gr.Row(): | |
| input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") | |
| input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") | |
| with gr.Row(): | |
| input_msa = gr.File(label="Input Protein MSA (A3M)") | |
| input_protein = gr.File(label="Input protein monomer") | |
| # define any options here | |
| # for automated inference the default options are used | |
| # slider_option = gr.Slider(0,10, label="Slider Option") | |
| # checkbox_option = gr.Checkbox(label="Checkbox Option") | |
| # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") | |
| btn = gr.Button("Run Inference") | |
| gr.Examples( | |
| [ | |
| [ | |
| "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", | |
| "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", | |
| "test_out.pdb" | |
| ], | |
| ], | |
| [input_sequence, input_ligand, input_protein], | |
| ) | |
| reps = [ | |
| { | |
| "model": 0, | |
| "style": "cartoon", | |
| "color": "whiteCarbon", | |
| }, | |
| { | |
| "model": 1, | |
| "style": "stick", | |
| "color": "greenCarbon", | |
| } | |
| ] | |
| out = Molecule3D(reps=reps) | |
| metrics = gr.JSON(label="Metrics") | |
| run_time = gr.Textbox(label="Runtime") | |
| btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out,metrics, run_time]) | |
| app.launch() |