Parker Tope commited on
Commit
4eda316
·
0 Parent(s):

changes to app vis 12 seq only

Browse files
Files changed (3) hide show
  1. app.py +81 -0
  2. requirements.txt +10 -0
  3. test_sequences.csv +63 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ import pandas as pd
5
+ import tempfile
6
+ from pathlib import Path
7
+ from inference_wrapper import RNAFoldingPredictor
8
+
9
+ MODEL_PATH = os.environ.get("MODEL_PATH", "ptope/rna-model-weights")
10
+ STATIC_DIR = "static"
11
+ os.makedirs(STATIC_DIR, exist_ok=True)
12
+
13
+ predictor = RNAFoldingPredictor(MODEL_PATH)
14
+
15
+ # Load test sequences
16
+ test_sequences = pd.read_csv("test_sequences.csv")
17
+ test_seq_list = test_sequences["sequence"].dropna().unique().tolist()
18
+
19
+ def create_pdb_from_prediction(prediction_df):
20
+ pdb_lines = ["HEADER RNA STRUCTURE PREDICTION"]
21
+ atom_index = 1
22
+ prev_c1_index = None
23
+ for _, row in prediction_df.iterrows():
24
+ resname = row['resname']
25
+ resid = row['resid']
26
+ x, y, z = row['x_1'], row['y_1'], row['z_1']
27
+ c1_index = atom_index
28
+ pdb_lines.append(f"ATOM {atom_index:5d} C1' {resname} A{resid:4d} {x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C")
29
+ atom_index += 1
30
+ base_offset = {'A': (1.5, 0.0, 0.5), 'C': (1.2, 0.3, 0.3), 'G': (1.7, -0.2, 0.7), 'U': (1.0, 0.5, 0.0)}.get(resname, (1.5, 0.0, 0.0))
31
+ base_x, base_y, base_z = x + base_offset[0], y + base_offset[1], z + base_offset[2]
32
+ base_index = atom_index
33
+ pdb_lines.append(f"ATOM {atom_index:5d} N9 {resname} A{resid:4d} {base_x:8.3f}{base_y:8.3f}{base_z:8.3f} 1.00 0.00 N")
34
+ atom_index += 1
35
+ pdb_lines.append(f"CONECT{c1_index:5d}{base_index:5d}")
36
+ if prev_c1_index is not None:
37
+ pdb_lines.append(f"CONECT{prev_c1_index:5d}{c1_index:5d}")
38
+ prev_c1_index = c1_index
39
+ pdb_lines.append("END")
40
+ return "\n".join(pdb_lines)
41
+
42
+ def predict_and_serve(sequence, description=""):
43
+ sequence = sequence.strip().upper()
44
+ if not sequence or not all(b in "ACGU" for b in sequence):
45
+ return "Invalid input", None, ""
46
+ df = predictor.predict(sequence, description)
47
+ pdb_text = create_pdb_from_prediction(df)
48
+ filename = f"{next(tempfile._get_candidate_names())}.pdb"
49
+ path = Path(STATIC_DIR) / filename
50
+ with open(path, "w") as f:
51
+ f.write(pdb_text)
52
+
53
+ space_id = os.environ.get("SPACE_ID", "your-username--space-name") # fallback if run locally
54
+ iframe_url = f"https://molstar.org/viewer/?loadFromUrl=https://{space_id}.hf.space/file=static/{filename}"
55
+ iframe_html = f'<iframe src="{iframe_url}" width="100%" height="600px" style="border:1px solid #ccc;"></iframe>'
56
+ return f"Predicted {len(df)} residues.", str(path), iframe_html
57
+
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("## 🧬 RNA 3D Viewer (Hugging Face + Mol*)")
60
+
61
+ with gr.Row():
62
+ with gr.Column():
63
+ dropdown = gr.Dropdown(choices=test_seq_list, label="Select a Test Sequence")
64
+ seq_input = gr.Textbox(label="RNA Sequence", lines=4)
65
+ desc_input = gr.Textbox(label="Description (optional)", lines=1)
66
+ dropdown.change(fn=lambda s: s, inputs=dropdown, outputs=seq_input)
67
+ submit_btn = gr.Button("Submit")
68
+
69
+ with gr.Column():
70
+ status_output = gr.Textbox(label="Status")
71
+ file_output = gr.File(label="Download .pdb")
72
+
73
+ viewer_html = gr.HTML(label="Mol* Viewer")
74
+
75
+ submit_btn.click(
76
+ fn=predict_and_serve,
77
+ inputs=[seq_input, desc_input],
78
+ outputs=[status_output, file_output, viewer_html]
79
+ )
80
+
81
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ pandas==2.0.3
3
+ numpy==1.24.4
4
+ torch>=2.0.0
5
+ transformers>=4.28.0
6
+ tqdm>=4.65.0
7
+ biopython==1.81
8
+ sentence-transformers>=2.2.2
9
+ matplotlib>=3.7.1 # Useful for potential debugging or visualization
10
+ scikit-learn>=1.2.2 # Commonly used with transformers
test_sequences.csv ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target_id,sequence,temporal_cutoff,description,all_sequences
2
+ R1107,GGGGGCCACAGCAGAAGCGUUCACGUCGCAGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU,2022-05-28,"CPEB3 ribozyme
3
+ Human
4
+ human CPEB3 HDV-like ribozyme",">7QR4_1|Chain A|U1 small nuclear ribonucleoprotein A|Homo sapiens (9606)
5
+ RPNHTIYINNLNEKIKKDELKKSLHAIFSRFGQILDILVSRSLKMRGQAFVIFKEVSSATNALRSMQGFPFYDKPMRIQYAKTDSDIIAKM
6
+ >7QR4_2|Chain B|RNA CPEB3 ribozyme|Homo sapiens (9606)
7
+ GGGGGCCACAGCAGAAGCGUUCACGUCGCAGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU"
8
+ R1108,GGGGGCCACAGCAGAAGCGUUCACGUCGCGGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU,2022-05-27,"CPEB3 ribozyme
9
+ Chimpanzee
10
+ Chimpanzee CPEB3 HDV-like ribozyme",">7QR3_1|Chains A, B|U1 small nuclear ribonucleoprotein A|Homo sapiens (9606)
11
+ RPNHTIYINNLNEKIKKDELKKSLHAIFSRFGQILDILVSRSLKMRGQAFVIFKEVSSATNALRSMQGFPFYDKPMRIQYAKTDSDIIAKM
12
+ >7QR3_2|Chains C, D|chimpanzee CPEB3 ribozyme|Pan troglodytes (9598)
13
+ GGGGGCCACAGCAGAAGCGUUCACGUCGCGGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU"
14
+ R1116,CGCCCGGAUAGCUCAGUCGGUAGAGCAGCGGCUAAAACAGCUCUGGGGUUGUACCCACCCCAGAGGCCCACGUGGCGGCUAGUACUCCGGUAUUGCGGUACCCUUGUACGCCUGUUUUAGCCGCGGGUCCAGGGUUCAAGUCCCUGUUCGGGCGCCA,2022-06-04,"Cloverleaf RNA
15
+ Poliovirus
16
+ Crystal Structure of Poliovirus (type 1 Mahoney) cloverleaf RNA with tRNA scaffold",">8S95_1|Chain A[auth C]|Lysine tRNA scaffold,Poliovirus cloverleaf RNA|Homo sapiens (9606)
17
+ CGCCCGGAUAGCUCAGUCGGUAGAGCAGCGGCUAAAACAGCUCUGGGGUUGUACCCACCCCAGAGGCCCACGUGGCGGCUAGUACUCCGGUAUUGCGGUACCCUUGUACGCCUGUUUUAGCCGCGGGUCCAGGGUUCAAGUCCCUGUUCGGGCGCCA"
18
+ R1117v2,UUGGGUUCCCUCACCCCAAUCAUAAAAAGG,2022-06-03,"PreQ1 class I type III riboswitch
19
+ K. pneumoniae
20
+ Additional Information: This is a ligand-only target (re-release of R1117 with the corrected SMILES string).
21
+ ID Name SMILES Relevant
22
+ 001 PRF NCc1c[nH]c2nc(N)[nH]c(=O)c12 Yes
23
+ Class I type III preQ1 riboswitch from E. coli",">8FZA_1|Chains A, B|PreQ1 Riboswitch (30-MER)|Escherichia coli (562)
24
+ UUGGGUUCCCUCACCCCAAUCAUAAAAAGG"
25
+ R1126,GGAAUCUCGCCCGAUGUUCGCAUCGGGAUUUGCAGGUCCAUGGAUUACACCAUGCAACGCAGACCUGUAGAUGCCACGCUAGCCGUGGUGAGGGUCGGGUCCAGAUGUCAUUCGACUUUAACGCGCCUAAGCGUUGAAGGCGUGUUAGAGCAGAUAGUUCGCUAUCUGGGGAGCCUGUUCGCAGGCUCAGGAGCCUUCGGGCUCCUAGCGCUAUUACCCCGGACACCACCGGGCAGACAAGUAAUGGUGCUCCUCGAAUGACUUCUGUUGAGUAGAGUGUGGGCUCCGCGGCUAGUGUGCACCUUAGCGGUGAAUGUCUGACACCGUUAAGGUGGUUACUCUUCGGAGUAACGCCGAGAUUCC,2022-06-11,"Traptamer
26
+ Synthetic
27
+ Additional Information: Contains a relevant ion.
28
+ RNA origami 3-helix tile Traptamer",">8TVZ_1|Chain A[auth C]|RNA (363-MER)|synthetic construct (32630)
29
+ GGAAUCUCGCCCGAUGUUCGCAUCGGGAUUUGCAGGUCCAUGGAUUACACCAUGCAACGCAGACCUGUAGAUGCCACGCUAGCCGUGGUGAGGGUCGGGUCCAGAUGUCAUUCGACUUUAACGCGCCUAAGCGUUGAAGGCGUGUUAGAGCAGAUAGUUCGCUAUCUGGGGAGCCUGUUCGCAGGCUCAGGAGCCUUCGGGCUCCUAGCGCUAUUACCCCGGACACCACCGGGCAGACAAGUAAUGGUGCUCCUCGAAUGACUUCUGUUGAGUAGAGUGUGGGCUCCGCGGCUAGUGUGCACCUUAGCGGUGAAUGUCUGACACCGUUAAGGUGGUUACUCUUCGGAGUAACGCCGAGAUUCC"
30
+ R1128,GGAAUAUCGUCAUGGUGAUUCGUCACCAUGAGGCUAGAUCUCAUAUCUAGCGCUUUCGAGCGCUAGAGUCCUUAUCUAGCCGGUUUAUACUUUCGAGUGUGAACCCGAUAUUCCGCGGAUCACUAUGAGUCGUUCGCGGCUCAUAGUCCGGCUCAAAGGACAUCAUGGCCUGUUCGCAGGUUGUGAUUAUGAGUGAGCCGGGUAAGGCAUACCGUUCGCGGUAUGUCUUACGAUCCGC,2022-06-10,"6WJ
31
+ Single-stranded Paranemic Crossover RNA Triangle (PXT)",">8BTZ_1|Chain A|RNA Paranemic croosover triangle (PXT)|synthetic construct (32630)
32
+ GGAAUAUCGUCAUGGUGAUUCGUCACCAUGAGGCUAGAUCUCAUAUCUAGCGCUUUCGAGCGCUAGAGUCCUUAUCUAGCCGGUUUAUACUUUCGAGUGUGAACCCGAUAUUCCGCGGAUCACUAUGAGUCGUUCGCGGCUCAUAGUCCGGCUCAAAGGACAUCAUGGCCUGUUCGCAGGUUGUGAUUAUGAGUGAGCCGGGUAAGGCAUACCGUUCGCGGUAUGUCUUACGAUCCGC"
33
+ R1136,GGAUACGUCUACGCUCAGUGACGGACUCUCUUCGGAGAGUCUGACAUCCGAACCAUACACGGAUGUGCCUCGCCGAACAGUCUACGGCGAGCUUAAGCGCUGGGGACGCCCAACGCAUCACAAAGACUGAGUGAUGAACCAGAAGUAUGGACUGGUUGCGUUGGUGGAGACGGUCGGGUCCAGUUCGCUGUCGAGUAGAGUGUGGGCUCCAUCGACGCCGCUUUAAGGUCCCCAAUCGUGGCGUGUCGGCCUGCUUCGGCAGGCACUGGCGCCGGGACCUUGAAGAGAUGAGAUUUCGAUCUCAUCUUUGGGUGUCUCUGGUGCUUGAGGGCCCUGUGUUCGCACAGGGCCGCUCACUGGGUGUGGACGUAUCC,2022-06-18,"Apta-FRET
34
+ Additional Information: Information about the bound ligand is provided in SMILES section below.
35
+ ID Name SMILES Relevant
36
+ 001 1TU Cc1nc(Cc2cc(F)c(O)c(F)c2)c(O)n1C Yes
37
+ 002 J93 CN(CCO)c1cc2sc(/C=C(\C#N)c3ccc(C#N)cc3)cc2s1 Yes
38
+ 003 K [K+] Yes
39
+ Ligand bound state of a brocolli-pepper aptamer FRET tile",">7ZJ4_1|Chain A[auth E]|brocolli-pepper aptamer|synthetic construct (32630)
40
+ GGAUACGUCUACGCUCAGUGACGGACUCUCUUCGGAGAGUCUGACAUCCGAACCAUACACGGAUGUGCCUCGCCGAACAGUCUACGGCGAGCUUAAGCGCUGGGGACGCCCAACGCAUCACAAAGACUGAGUGAUGAACCAGAAGUAUGGACUGGUUGCGUUGGUGGAGACGGUCGGGUCCAGUUCGCUGUCGAGUAGAGUGUGGGCUCCAUCGACGCCGCUUUAAGGUCCCCAAUCGUGGCGUGUCGGCCUGCUUCGGCAGGCACUGGCGCCGGGACCUUGAAGAGAUGAGAUUUCGAUCUCAUCUUUGGGUGUCUCUGGUGCUUGAGGGCCCUGUGUUCGCACAGGGCCGCUCACUGGGUGUGGACGUAUCC"
41
+ R1138,GGGAGAGUACUAUUCAGAUGCAGACCGCAAGUUCAGAGCGGUUUGCAUCUAGGGUACGUUUUCGAACGUAUCCUCCGACUAAGUGUAUUCGUAUACUUAGUGCCUUGUGCCUGCUUCGGCAGGCAUGACCCAAAUGUGCCUUUCGGGGCACAUUUCCGGUCAUCCAAGUUCGCUUGGGUGAUGCGGGCGUAUAGGUUCGUCUAUACGUCCGCGUUUUCCGAGAAGAGGUAACUCGGGAAACCGGUCCACGUGACAAAGGUAGAGUUACGUGGAGGGAGCAGCUGCAAAGGGAUAAUGCAGUUGCUGGCUGGAUGCCAGAACUCACGACUGGCAUCUACGGGGAUGGUGCUCUCCCAAUUCUCCAUUUACCGCCGAAUCGACCCCAACGUGAGAGGGGUCGGUUCCCCGAGCAUAGACCAAUAUCCCAGGUUUAUGCUCCCCAACGCUGGACGAACUACCUACGUCUAGCGUUCCGGCAAAUGAGUCAAUACCUCAGACUUAUUUGCGGUGCCUGAGCCUAAACUGAACAUGGGUUCAGGCAUCUUGGCUCCAGUUCGCUGGAGCCGACGGUAGCGCUGCGUUCGCGCAGUGCUAGGGAGCAUCCGUUUUCGAGCGGAUGCUGGGCGGUUGCCUGUUCGCAGGCAAUCGGGCCUACUCAUGAUUCGUCAUGAGUGGUGACAGCGUGAUGUUCGCAUUACGCUGUCGGGUAGAUGGAGAAUU,2022-06-24,"6HBC-Young
42
+ Additional Information: This is a co-transcriptional product. The structure observed in the cryo-EM grids immediately after the transcription and that around 8 hours later have alternative conformations. You can submit alternative conformations as separate models (we still stick to 5 models per target maximum).
43
+ Young conformer of a 6-helix bundle of RNA with clasp",">7PTK_1|Chain A[auth B]|RNA|synthetic construct (32630)
44
+ GGGAGAGUACUAUUCAGAUGCAGACCGCAAGUUCAGAGCGGUUUGCAUCUAGGGUACGUUUUCGAACGUAUCCUCCGACUAAGUGUAUUCGUAUACUUAGUGCCUUGUGCCUGCUUCGGCAGGCAUGACCCAAAUGUGCCUUUCGGGGCACAUUUCCGGUCAUCCAAGUUCGCUUGGGUGAUGCGGGCGUAUAGGUUCGUCUAUACGUCCGCGUUUUCCGAGAAGAGGUAACUCGGGAAACCGGUCCACGUGACAAAGGUAGAGUUACGUGGAGGGAGCAGCUGCAAAGGGAUAAUGCAGUUGCUGGCUGGAUGCCAGAACUCACGACUGGCAUCUACGGGGAUGGUGCUCUCCCAAUUCUCCAUUUACCGCCGAAUCGACCCCAACGUGAGAGGGGUCGGUUCCCCGAGCAUAGACCAAUAUCCCAGGUUUAUGCUCCCCAACGCUGGACGAACUACCUACGUCUAGCGUUCCGGCAAAUGAGUCAAUACCUCAGACUUAUUUGCGGUGCCUGAGCCUAAACUGAACAUGGGUUCAGGCAUCUUGGCUCCAGUUCGCUGGAGCCGACGGUAGCGCUGCGUUCGCGCAGUGCUAGGGAGCAUCCGUUUUCGAGCGGAUGCUGGGCGGUUGCCUGUUCGCAGGCAAUCGGGCCUACUCAUGAUUCGUCAUGAGUGGUGACAGCGUGAUGUUCGCAUUACGCUGUCGGGUAGAUGGAGAAUU"
45
+ R1149,GGACACGAGUAACUCGUCUAUCUUCUGCAGGCUGCUUACGGUUUCGUCCGUGUUGCAGCCGAUCAUCAGCACAUCUAGGUUUCGUCCGGGUGUGACCGAAAGGUAAGAUGGAGAGCCUUGUCCC,2022-07-02,"SARS-CoV-2 SL5
46
+ Additional Information: Alternative conformations present.
47
+ SARS-CoV-2 5 proximal stem-loop 5",">8UYS_1|Chain A|SARS-CoV-2 RNA SL5 domain.|Severe acute respiratory syndrome coronavirus 2 (2697049)
48
+ GGACACGAGUAACUCGUCUAUCUUCUGCAGGCUGCUUACGGUUUCGUCCGUGUUGCAGCCGAUCAUCAGCACAUCUAGGUUUCGUCCGGGUGUGACCGAAAGGUAAGAUGGAGAGCCUUGUCCC"
49
+ R1156,GGAGCAUCGUGUCUCAAGUGCUUCACGGUCACAAUAUACCGUUUCGUCGGGUGCGUGGCAAUUCGGUGCACAUCAUGUCUUUCGUGGCUGGUGUGGCUCCUCAAGGUGCGAGGGGCAAGUAUAGAGCAGAGCUCC,2022-07-07,"BtCoV-HKU5 SL5
50
+ BtCoV-HKU5 5 proximal stem-loop 5, conformation 1",">8UYE_1|Chain A|BtCoV-HKU5 5' proximal stem-loop 5|Pipistrellus bat coronavirus HKU5 (694008)
51
+ GGAGCAUCGUGUCUCAAGUGCUUCACGGUCACAAUAUACCGUUUCGUCGGGUGCGUGGCAAUUCGGUGCACAUCAUGUCUUUCGUGGCUGGUGUGGCUCCUCAAGGUGCGAGGGGCAAGUAUAGAGCAGAGCUCC"
52
+ R1189,GCGUACAGGGAACACGCAACCCCGAAGGAUCGGGGAAGGGACGUCGCCAGGGAGGCGAUUCCAUCAGGAUGAUGACGAGGGACUGAAGAGUGGGCGGGGUAAUACCCCGCCCCUUUUU,2022-08-11,"A-6B
53
+ Additional Information: The T1189/R1189 and T1190/R1190 complexes represent alternative conformations corresponding to different particles in the same cryo-EM data set. The complexes contain one RNA molecule and several (4 or 6) protein molecules. The R1189/T1189 target pair represent the A1B6 complex, while the R1190/T1190 pair - A1B4 complex. Predictions for the corresponding RNA and protein targets should be submitted in the same frame of reference so that the concatenation of corresponding models, say, R1189TS000_1 and T1189TS000_1 will give a coordinate set for the full RNA-protein complex.
54
+ Cryo-EM structure of Pseudomonas aeruginosa RsmZ RNA in complex with three RsmA protein dimers",">7YR7_1|Chains A[auth B], B[auth C], C[auth D], D[auth E], F, G|Translational regulator CsrA|Pseudomonas aeruginosa (287)
55
+ MLILTRRVGETLMVGDDVTVTVLGVKGNQVRIGVNAPKEVAVHREEIYQRIQKEK
56
+ >7YR7_2|Chain E[auth A]|RsmZ RNA (118-MER)|Pseudomonas aeruginosa (287)
57
+ GCGUACAGGGAACACGCAACCCCGAAGGAUCGGGGAAGGGACGUCGCCAGGGAGGCGAUUCCAUCAGGAUGAUGACGAGGGACUGAAGAGUGGGCGGGGUAAUACCCCGCCCCUUUUU"
58
+ R1190,GCGUACAGGGAACACGCAACCCCGAAGGAUCGGGGAAGGGACGUCGCCAGGGAGGCGAUUCCAUCAGGAUGAUGACGAGGGACUGAAGAGUGGGCGGGGUAAUACCCCGCCCCUUUUU,2022-08-11,"A-4B
59
+ Additional Information: The T1189/R1189 and T1190/R1190 complexes represent alternative conformations corresponding to different particles in the same cryo-EM data set. The complexes contain one RNA molecule and several (4 or 6) protein molecules. The R1189/T1189 target pair represent the A1B6 complex, while the R1190/T1190 pair - A1B4 complex. Predictions for the corresponding RNA and protein targets should be submitted in the same frame of reference so that the concatenation of corresponding models, say, R1189TS000_1 and T1189TS000_1 will give a coordinate set for the full RNA-protein complex.
60
+ Cryo-EM structure of Pseudomonas aeruginosa RsmZ RNA in complex with two RsmA protein dimers",">7YR6_1|Chains A[auth B], B[auth C], C[auth D], D[auth E]|Translational regulator CsrA|Pseudomonas aeruginosa (287)
61
+ MLILTRRVGETLMVGDDVTVTVLGVKGNQVRIGVNAPKEVAVHREEIYQRIQKEK
62
+ >7YR6_2|Chain E[auth A]|RsmZ RNA|Pseudomonas aeruginosa (287)
63
+ GCGUACAGGGAACACGCAACCCCGAAGGAUCGGGGAAGGGACGUCGCCAGGGAGGCGAUUCCAUCAGGAUGAUGACGAGGGACUGAAGAGUGGGCGGGGUAAUACCCCGCCCCUUUUU"