Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import re | |
| import pandas as pd | |
| import networkx as nx | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from matplotlib import cm | |
| st.image("banner.png", use_column_width=True) | |
| st.markdown( | |
| "<h1 style='text-align: center;'>CMR and Heart Failure Colocalisation Drug Interaction Viewer</h1>", | |
| unsafe_allow_html=True | |
| ) | |
| # Description text | |
| st.markdown( | |
| """ | |
| This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs. | |
| You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information. | |
| Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data. | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Load and prepare colocalisation results | |
| annotations = pd.read_csv("colocalisation_results.csv") | |
| annotations.fillna(0, inplace=True) | |
| annotations = annotations.set_index("Gene") | |
| # Filter based on gene list | |
| st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.") | |
| # Define a function to collect genes from input | |
| collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""] | |
| input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):") | |
| gene_list = collect_genes(input_gene_list) | |
| # Function to convert DataFrame to CSV for download | |
| def convert_df(df): | |
| return df.to_csv(index=False).encode('utf-8') | |
| if len(gene_list) > 1: | |
| # Filter for input gene list | |
| df = annotations[annotations.index.isin(gene_list)] | |
| df['Gene'] = df.index | |
| df.reset_index(drop=True, inplace=True) | |
| # Reorder columns to have "Gene" as the first column | |
| df = df[['Gene'] + [col for col in df.columns if col != 'Gene']] | |
| # Display the filtered results | |
| st.dataframe(df) | |
| output = df[['Gene']] | |
| csv = convert_df(output) | |
| # st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv') | |
| # Add a new search box for filtering by disease name | |
| input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):") | |
| if input_disease: | |
| # Search for partial matches in the "terms_drug" column | |
| df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)] | |
| if not df_disease_filtered.empty: | |
| st.markdown(f"### Colocalisation results for disease: {input_disease}") | |
| df_disease_filtered['Gene'] = df_disease_filtered.index | |
| df_disease_filtered.reset_index(drop=True, inplace=True) | |
| # Reorder columns to have "Gene" as the first column | |
| df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']] | |
| # Display filtered dataframe | |
| st.dataframe(df_disease_filtered) | |
| # Convert filtered dataframe to CSV for download | |
| csv_disease_filtered = convert_df(df_disease_filtered) | |
| # st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv') | |
| else: | |
| st.write(f"No results found for disease: {input_disease}") | |
| # Display individual gene details if a single gene is input | |
| input_gene = st.text_input("Input an individual HGNC gene:") | |
| if input_gene: | |
| df2 = annotations[annotations.index == input_gene] | |
| if not df2.empty: | |
| df2['Gene'] = df2.index | |
| df2.reset_index(drop=True, inplace=True) | |
| # Reorder columns to have "Gene" as the first column | |
| df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']] | |
| st.dataframe(df2) | |
| # Provide a link to the gene's DrugnomeAI page | |
| url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}" | |
| markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})" | |
| st.markdown(markdown_link, unsafe_allow_html=True) | |
| else: | |
| st.write("Gene not found in the dataset.") | |
| # Display the entire dataset with download option | |
| st.markdown("### All Colocalisation Results Interacting with Drugs") | |
| df_total_output = annotations.copy() | |
| df_total_output['Gene'] = df_total_output.index | |
| df_total_output.reset_index(drop=True, inplace=True) | |
| # Reorder columns to have "Gene" as the first column | |
| df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']] | |
| st.dataframe(df_total_output) | |
| csv = convert_df(df_total_output) | |
| # st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv') | |
| # Protein interaction network visualization using STRINGDB_data.tsv | |
| st.markdown( | |
| "<h1 style='text-align: center;'>Protein Interaction Networks of Colocalising Drug Targets</h1>", | |
| unsafe_allow_html=True | |
| ) | |
| # Description text | |
| st.markdown( | |
| """ | |
| - The colour of each node represents its degree (number of direct connections it has with other nodes). | |
| - The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins). | |
| - Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions). | |
| - Genes that interact with cardiovascular drugs are highlighted with a bold black outline. | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Load STRINGDB dataset | |
| ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t') | |
| # Create a graph from the STRINGDB PPI data | |
| G = nx.Graph() | |
| # Add edges to the graph based on PPI data | |
| for index, row in ppi_data.iterrows(): | |
| G.add_edge(row['node1'], row['node2'], weight=row['combined_score']) | |
| # Function to rescale values to a given range | |
| def rescale(l, newmin, newmax): | |
| arr = list(l) | |
| return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr] | |
| # Use the plasma colormap | |
| graph_colormap = plt.get_cmap('plasma', 12) | |
| # Node color varies with Degree | |
| c = rescale([G.degree(v) for v in G], 0.0, 0.9) | |
| c = [graph_colormap(i) for i in c] | |
| # Node size varies with betweeness centrality - map to range [1500, 7000] | |
| bc = nx.betweenness_centrality(G) | |
| s = rescale([v for v in bc.values()], 1500, 7000) | |
| # Edge width shows 1 - weight (to convert cost back to strength of interaction) | |
| ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4) | |
| ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1) | |
| ec = [graph_colormap(i) for i in ec] | |
| # Adjust spring_layout parameters to bring the networks closer together | |
| pos = nx.spring_layout(G, k=0.5) | |
| # Prepare to highlight genes with "Cardiovascular_Drug" as "Yes" | |
| highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index | |
| # Draw the network plot | |
| plt.figure(figsize=(19, 9), facecolor='white') | |
| # Draw the nodes with black outline for highlighted ones | |
| nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2) | |
| # Draw the edges | |
| nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew) | |
| # Draw node labels with customized font color based on degree | |
| # Draw node labels with customized font color based on degree | |
| for node, (x, y) in pos.items(): | |
| # Determine font color | |
| font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black' | |
| # Dynamically adjust font size for nodes with white text (smaller font size to fit inside node) | |
| if font_color == 'white': | |
| font_size = min(s[list(G.nodes).index(node)] * 0.01, 10) # Adjust the multiplier and limit font size | |
| else: | |
| font_size = 12 # Default size for black font | |
| plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color) | |
| # Add a colorbar to represent the node degree color scale | |
| sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1)) | |
| sm.set_array([]) | |
| cbar = plt.colorbar(sm) | |
| cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12) | |
| plt.axis('off') | |
| # Display the network plot in the Streamlit app directly | |
| st.pyplot(plt) | |