Spaces:
Build error
Build error
Commit
·
10d0895
1
Parent(s):
8c638cc
Changed text
Browse files
app.py
CHANGED
|
@@ -10,29 +10,10 @@ import matplotlib.pyplot as plt
|
|
| 10 |
import seaborn as sns
|
| 11 |
import gradio as gr
|
| 12 |
|
| 13 |
-
tokenizer = PreTrainedTokenizerFast(tokenizer_file="./tranception/utils/tokenizers/Basic_tokenizer",
|
| 14 |
-
unk_token="[UNK]",
|
| 15 |
-
sep_token="[SEP]",
|
| 16 |
-
pad_token="[PAD]",
|
| 17 |
-
cls_token="[CLS]",
|
| 18 |
-
mask_token="[MASK]"
|
| 19 |
-
)
|
| 20 |
#######################################################################################################################################
|
| 21 |
############################################### HELPER FUNCTIONS ####################################################################
|
| 22 |
#######################################################################################################################################
|
| 23 |
|
| 24 |
-
import torch
|
| 25 |
-
import transformers
|
| 26 |
-
from transformers import PreTrainedTokenizerFast
|
| 27 |
-
import tranception
|
| 28 |
-
import datasets
|
| 29 |
-
from tranception import config, model_pytorch
|
| 30 |
-
import pandas as pd
|
| 31 |
-
import matplotlib.pyplot as plt
|
| 32 |
-
import seaborn as sns
|
| 33 |
-
import numpy as np
|
| 34 |
-
import gradio as gr
|
| 35 |
-
|
| 36 |
AA_vocab = "ACDEFGHIKLMNPQRSTVWY"
|
| 37 |
tokenizer = PreTrainedTokenizerFast(tokenizer_file="./tranception/utils/tokenizers/Basic_tokenizer",
|
| 38 |
unk_token="[UNK]",
|
|
@@ -166,7 +147,6 @@ def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutat
|
|
| 166 |
score_heatmaps.append(create_scoring_matrix_visual(scores,sequence,image_index,window_start,window_end,AA_vocab))
|
| 167 |
window_start += max_number_positions_per_heatmap
|
| 168 |
window_end = min(mutation_range_end,window_start+max_number_positions_per_heatmap-1)
|
| 169 |
-
print(score_heatmaps)
|
| 170 |
return score_heatmaps, suggest_mutations(scores)
|
| 171 |
|
| 172 |
def extract_sequence(example):
|
|
@@ -186,7 +166,7 @@ def clear_inputs(protein_sequence_input,mutation_range_start,mutation_range_end)
|
|
| 186 |
tranception_design = gr.Blocks()
|
| 187 |
|
| 188 |
with tranception_design:
|
| 189 |
-
gr.Markdown("#
|
| 190 |
gr.Markdown(" Perform in silico directed evolution with Tranception to iteratively improve the fitness of a protein of interest, one mutation at a time. At each step, the Tranception model computes the log likelihood ratios of all possible single amino acid substitution Vs the starting sequence, and outputs a fitness heatmap and recommandations to guide the selection of the mutation to apply.")
|
| 191 |
|
| 192 |
with gr.Tabs():
|
|
@@ -247,7 +227,7 @@ with tranception_design:
|
|
| 247 |
gr.Markdown("<br>")
|
| 248 |
gr.Markdown("# Fitness predictions for all single amino acid substitutions in mutation range")
|
| 249 |
gr.Markdown("Inference may take a few seconds for short proteins & mutation ranges to several minutes for longer ones")
|
| 250 |
-
output_image = gr.Gallery(label="Fitness predictions for all single amino acid substitutions in mutation range",type="filepath") #Using Gallery to
|
| 251 |
|
| 252 |
output_recommendations = gr.Textbox(label="Mutation recommendations")
|
| 253 |
|
|
|
|
| 10 |
import seaborn as sns
|
| 11 |
import gradio as gr
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
#######################################################################################################################################
|
| 14 |
############################################### HELPER FUNCTIONS ####################################################################
|
| 15 |
#######################################################################################################################################
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
AA_vocab = "ACDEFGHIKLMNPQRSTVWY"
|
| 18 |
tokenizer = PreTrainedTokenizerFast(tokenizer_file="./tranception/utils/tokenizers/Basic_tokenizer",
|
| 19 |
unk_token="[UNK]",
|
|
|
|
| 147 |
score_heatmaps.append(create_scoring_matrix_visual(scores,sequence,image_index,window_start,window_end,AA_vocab))
|
| 148 |
window_start += max_number_positions_per_heatmap
|
| 149 |
window_end = min(mutation_range_end,window_start+max_number_positions_per_heatmap-1)
|
|
|
|
| 150 |
return score_heatmaps, suggest_mutations(scores)
|
| 151 |
|
| 152 |
def extract_sequence(example):
|
|
|
|
| 166 |
tranception_design = gr.Blocks()
|
| 167 |
|
| 168 |
with tranception_design:
|
| 169 |
+
gr.Markdown("# In silico directed evolution for protein redesign with Tranception")
|
| 170 |
gr.Markdown(" Perform in silico directed evolution with Tranception to iteratively improve the fitness of a protein of interest, one mutation at a time. At each step, the Tranception model computes the log likelihood ratios of all possible single amino acid substitution Vs the starting sequence, and outputs a fitness heatmap and recommandations to guide the selection of the mutation to apply.")
|
| 171 |
|
| 172 |
with gr.Tabs():
|
|
|
|
| 227 |
gr.Markdown("<br>")
|
| 228 |
gr.Markdown("# Fitness predictions for all single amino acid substitutions in mutation range")
|
| 229 |
gr.Markdown("Inference may take a few seconds for short proteins & mutation ranges to several minutes for longer ones")
|
| 230 |
+
output_image = gr.Gallery(label="Fitness predictions for all single amino acid substitutions in mutation range",type="filepath") #Using Gallery to break down large scoring matrices into smaller images
|
| 231 |
|
| 232 |
output_recommendations = gr.Textbox(label="Mutation recommendations")
|
| 233 |
|