Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| OpenMed NER Model Discovery App | |
| A beautiful Gradio interface for exploring and discovering OpenMed NER models | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| from pathlib import Path | |
| import re | |
| from collections import Counter | |
| class OpenMedModelDiscovery: | |
| def __init__(self): | |
| self.data_file = Path(__file__).parent / "data" / "openmed_models_database.csv" | |
| self.df = pd.read_csv(self.data_file) | |
| # Clean and prepare data | |
| self._prepare_data() | |
| # Define entity colors | |
| self.entity_colors = { | |
| "Chemical": "#2E8B57", # SeaGreen | |
| "DNA": "#4169E1", # RoyalBlue | |
| "RNA": "#1E90FF", # DodgerBlue | |
| "Protein": "#9932CC", # DarkOrchid | |
| "Gene": "#8A2BE2", # BlueViolet | |
| "Gene/Protein": "#6A5ACD", # SlateBlue | |
| "Disease": "#DC143C", # Crimson | |
| "Cell Line": "#FF6347", # Tomato | |
| "Cell Type": "#FF4500", # OrangeRed | |
| "Cell": "#FF8C00", # DarkOrange | |
| "Anatomy": "#32CD32", # LimeGreen | |
| "Species": "#228B22", # ForestGreen | |
| "Cancer": "#8B0000", # DarkRed | |
| "Clinical": "#4682B4", # SteelBlue | |
| "Protein Complex": "#9370DB", # MediumPurple | |
| "Protein Family": "#8B008B", # DarkMagenta | |
| "Protein Variant": "#9400D3", # Violet | |
| "Amino Acid": "#BA55D3", # MediumOrchid | |
| "Cellular Component": "#20B2AA", # LightSeaGreen | |
| "Default": "#696969", # DimGray | |
| } | |
| def _prepare_data(self): | |
| """Clean and prepare the data for better display""" | |
| # Fill missing values | |
| self.df["entities"] = self.df["entities"].fillna("") | |
| self.df["size_mb"] = pd.to_numeric(self.df["size_mb"], errors="coerce") | |
| # Create size categories | |
| self.df["size_category"] = self.df["size_mb"].apply(self._categorize_size) | |
| # Split entities into lists for easier filtering | |
| self.df["entity_list"] = self.df["entities"].apply( | |
| lambda x: [e.strip() for e in x.split(",")] if x else [] | |
| ) | |
| def _categorize_size(self, size_mb): | |
| """Categorize model size""" | |
| if pd.isna(size_mb): | |
| return "Unknown" | |
| elif size_mb < 100: | |
| return "Compact (<100M)" | |
| elif size_mb < 200: | |
| return "Medium (100-200M)" | |
| elif size_mb < 400: | |
| return "Large (200-400M)" | |
| else: | |
| return "XLarge (>400M)" | |
| def create_entity_badge(self, entity): | |
| """Create a colored badge for an entity type""" | |
| color = self.entity_colors.get(entity, self.entity_colors["Default"]) | |
| return f'<span style="background-color: {color}; color: white; padding: 3px 8px; border-radius: 12px; font-size: 12px; margin: 3px 4px; display: inline-block; line-height: 1.4;">{entity}</span>' | |
| def create_model_card(self, row): | |
| """Create a beautiful model card HTML""" | |
| entities_html = " ".join( | |
| [self.create_entity_badge(e) for e in row["entity_list"] if e] | |
| ) | |
| size_text = f"{row['size_mb']:.0f}M" if pd.notna(row["size_mb"]) else "Unknown" | |
| card_html = f""" | |
| <div style="border: 1px solid #ddd; border-radius: 8px; padding: 16px; margin: 8px 0; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px;"> | |
| <h3 style="margin: 0; color: #2c3e50; font-size: 18px;">{row['short_name']}</h3> | |
| <span style="background-color: #6c757d; color: white; padding: 4px 8px; border-radius: 4px; font-size: 12px;">{row['architecture']}</span> | |
| </div> | |
| <div style="margin-bottom: 8px;"> | |
| <strong>Domain:</strong> <span style="color: #495057;">{row['domain']}</span> | | |
| <strong>Size:</strong> <span style="color: #495057;">{size_text}</span> | |
| </div> | |
| <div style="margin-bottom: 12px;"> | |
| <strong>Entities:</strong><br> | |
| <div style="margin-top: 6px; line-height: 1.6;"> | |
| {entities_html if entities_html else '<span style="color: #6c757d; margin: 20px;">No entities available</span>'} | |
| </div> | |
| </div> | |
| <div style="margin-bottom: 12px;"> | |
| <strong>Description:</strong><br> | |
| <span style="color: #6c757d; font-style: italic;">{row['description']}</span> | |
| </div> | |
| <div style="display: flex; gap: 8px; margin-bottom: 8px;"> | |
| <a href="{row['hf_link']}" target="_blank" style="background-color: #007bff; color: white; padding: 6px 12px; border-radius: 4px; text-decoration: none; font-size: 12px;">π€ View on HF</a> | |
| <button onclick="copyToClipboard('{row['code_snippet']}')" style="background-color: #28a745; color: white; padding: 6px 12px; border-radius: 4px; border: none; cursor: pointer; font-size: 12px;">π Copy Code</button> | |
| </div> | |
| <details style="margin-top: 8px;"> | |
| <summary style="cursor: pointer; color: #007bff;">π Usage Code</summary> | |
| <pre style="background-color: #f8f9fa; padding: 8px; border-radius: 4px; margin-top: 4px; font-size: 11px; overflow-x: auto;"><code>from transformers import {row['code_snippet']}</code></pre> | |
| </details> | |
| </div> | |
| """ | |
| return card_html | |
| def search_models( | |
| self, text_query, entity_filters, domain_filters, size_filters, limit=20 | |
| ): | |
| """Search and filter models based on criteria""" | |
| filtered_df = self.df.copy() | |
| # Text search | |
| if text_query.strip(): | |
| text_mask = ( | |
| filtered_df["model_name"].str.contains(text_query, case=False, na=False) | |
| | filtered_df["short_name"].str.contains( | |
| text_query, case=False, na=False | |
| ) | |
| | filtered_df["domain"].str.contains(text_query, case=False, na=False) | |
| | filtered_df["description"].str.contains( | |
| text_query, case=False, na=False | |
| ) | |
| | filtered_df["entities"].str.contains(text_query, case=False, na=False) | |
| ) | |
| filtered_df = filtered_df[text_mask] | |
| # Entity filters | |
| if entity_filters: | |
| entity_mask = filtered_df["entity_list"].apply( | |
| lambda entities: any(entity in entity_filters for entity in entities) | |
| ) | |
| filtered_df = filtered_df[entity_mask] | |
| # Domain filters | |
| if domain_filters: | |
| filtered_df = filtered_df[filtered_df["domain"].isin(domain_filters)] | |
| # Size filters | |
| if size_filters: | |
| filtered_df = filtered_df[filtered_df["size_category"].isin(size_filters)] | |
| # Limit results | |
| filtered_df = filtered_df.head(limit) | |
| if filtered_df.empty: | |
| return "<div style='text-align: center; padding: 40px; color: #6c757d;'><h3>No models found π</h3><p>Try adjusting your search criteria</p></div>" | |
| # Create model cards | |
| cards_html = f"<div style='margin-bottom: 16px;'><h2>Found {len(filtered_df)} models</h2></div>" | |
| for _, row in filtered_df.iterrows(): | |
| cards_html += self.create_model_card(row) | |
| return cards_html | |
| def get_entity_stats(self): | |
| """Get entity statistics""" | |
| all_entities = [] | |
| for entity_list in self.df["entity_list"]: | |
| all_entities.extend(entity_list) | |
| entity_counts = Counter(all_entities) | |
| # Remove empty strings | |
| entity_counts = {k: v for k, v in entity_counts.items() if k} | |
| return entity_counts | |
| def get_filter_options(self): | |
| """Get all available filter options""" | |
| # Get unique domains | |
| domains = sorted(self.df["domain"].unique()) | |
| # Get unique sizes | |
| sizes = sorted(self.df["size_category"].unique()) | |
| # Get all unique entities | |
| all_entities = set() | |
| for entity_list in self.df["entity_list"]: | |
| all_entities.update(entity_list) | |
| entities = sorted([e for e in all_entities if e]) # Remove empty strings | |
| return entities, domains, sizes | |
| # Initialize the app | |
| app = OpenMedModelDiscovery() | |
| # Get filter options | |
| ALL_ENTITIES = [ | |
| "amino_acid", | |
| "anatomical_system", | |
| "anatomy", | |
| "cancer", | |
| "cell", | |
| "cell_line", | |
| "cell_line_name", | |
| "cell_type", | |
| "cellular_component", | |
| "chemical", | |
| "clinical", | |
| "developing_anatomical_structure", | |
| "disease", | |
| "dna", | |
| "gene/protein", | |
| "gene_or_protein", | |
| "immaterial_anatomical_entity", | |
| "multi_tissue_structure", | |
| "organ", | |
| "organism", | |
| "organism_subdivision", | |
| "organism_substance", | |
| "pathological_formation", | |
| "protein", | |
| "protein_complex", | |
| "protein_family", | |
| "protein_variant", | |
| "rna", | |
| "species", | |
| "tissue", | |
| ] | |
| entities, domains, sizes = app.get_filter_options() | |
| # Use comprehensive entity list instead of dynamic extraction for UI | |
| entities = ALL_ENTITIES | |
| # Custom CSS | |
| custom_css = """ | |
| <style> | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .model-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); | |
| gap: 16px; | |
| margin-top: 16px; | |
| } | |
| /* Copy to clipboard functionality */ | |
| </style> | |
| <script> | |
| function copyToClipboard(text) { | |
| navigator.clipboard.writeText(text).then(function() { | |
| alert('Code copied to clipboard!'); | |
| }); | |
| } | |
| </script> | |
| """ | |
| # Create the Gradio interface | |
| with gr.Blocks( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", secondary_hue="green", neutral_hue="slate" | |
| ), | |
| css=custom_css, | |
| title="π¬ OpenMed NER Model Discovery App", | |
| ) as demo: | |
| # Header | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;"> | |
| <h1 style="color: white; margin: 0; font-size: 36px;">π¬ OpenMed NER Model Discovery</h1> | |
| <p style="color: white; margin: 10px 0 0 0; font-size: 18px;">Discover the perfect NER model for your biomedical text analysis from 380+ free OpenMed models</p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # Search Tab | |
| with gr.Tab("π Search Models", elem_id="search-tab"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π― Search & Filter") | |
| text_search = gr.Textbox( | |
| label="Search Models", | |
| placeholder="e.g., chemical detection, cancer genomics, DNA...", | |
| lines=1, | |
| ) | |
| entity_filter = gr.Dropdown( | |
| choices=entities, | |
| label="Entities", | |
| info="Search and select entities (e.g., Chemical, DNA, Disease)...", | |
| multiselect=True, | |
| value=[], | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| domain_filter = gr.CheckboxGroup( | |
| choices=domains, label="Domains", value=[] | |
| ) | |
| size_filter = gr.CheckboxGroup( | |
| choices=sizes, label="Model Size", value=[] | |
| ) | |
| result_limit = gr.Slider( | |
| minimum=5, maximum=50, value=20, step=5, label="Max Results" | |
| ) | |
| clear_btn = gr.Button("ποΈ Clear Filters", variant="secondary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Search Results") | |
| results_display = gr.HTML() | |
| # Auto-search on any input change | |
| def auto_search(*args): | |
| return app.search_models(*args) | |
| # Connect auto-search to all inputs | |
| for component in [ | |
| text_search, | |
| entity_filter, | |
| domain_filter, | |
| size_filter, | |
| result_limit, | |
| ]: | |
| component.change( | |
| fn=auto_search, | |
| inputs=[ | |
| text_search, | |
| entity_filter, | |
| domain_filter, | |
| size_filter, | |
| result_limit, | |
| ], | |
| outputs=results_display, | |
| ) | |
| # Clear filters | |
| def clear_filters(): | |
| return "", [], [], [], 20 | |
| clear_btn.click( | |
| fn=clear_filters, | |
| outputs=[ | |
| text_search, | |
| entity_filter, | |
| domain_filter, | |
| size_filter, | |
| result_limit, | |
| ], | |
| ) | |
| # About Tab | |
| with gr.Tab("βΉοΈ About", elem_id="about-tab"): | |
| gr.Markdown( | |
| """ | |
| # π¬ About OpenMed NER Model Discovery | |
| ## What is OpenMed? | |
| OpenMed is a collection of **380+ state-of-the-art Named Entity Recognition (NER) models** for biomedical and clinical text analysis. All models are: | |
| - β **Completely Free** - Apache 2.0 license | |
| - β **High Performance** - F1 scores up to 99.8% | |
| - β **Ready to Use** - Compatible with Hugging Face Transformers | |
| - β **Diverse** - Covers 8+ medical domains and 20+ entity types | |
| ## π― Use Cases | |
| - **Drug Discovery** - Identify chemicals and compounds | |
| - **Clinical Research** - Extract diseases and symptoms | |
| - **Genomics** - Detect genes, proteins, and DNA/RNA | |
| - **Medical Records** - Parse anatomical terms and clinical notes | |
| - **Pharmacovigilance** - Monitor drug safety and adverse events | |
| ## ποΈ Model Architectures | |
| - **BERT** - Bidirectional transformers for robust performance | |
| - **DeBERTa** - Enhanced attention mechanisms | |
| - **RoBERTa** - Optimized training for biomedical text | |
| - **ModernBERT** - Latest advances in transformer architecture | |
| ## π Coverage | |
| - **8 Medical Domains** - Pharmacology, Genomics, Oncology, Pathology, etc. | |
| - **20+ Entity Types** - Chemical, DNA, RNA, Protein, Disease, Anatomy, etc. | |
| - **Multiple Sizes** - From 33M to 568M parameters | |
| - **380+ Models** - Comprehensive coverage for any biomedical NLP task | |
| ## π Getting Started | |
| 1. **Search** - Use the search tab to find models by domain, entity type, or keywords | |
| 2. **Compare** - View model cards with performance metrics and descriptions | |
| 3. **Copy Code** - Get ready-to-use code snippets | |
| 4. **Deploy** - Download and use with Hugging Face Transformers | |
| ## π§ Contact & Support | |
| - **Models** - [OpenMed on Hugging Face](https://huggingface.co/OpenMed) | |
| - **Paper** - Coming soon on arXiv | |
| - **Community** - Join discussions on Hugging Face | |
| --- | |
| Built with β€οΈ for the biomedical research community | |
| """ | |
| ) | |
| # Load initial results | |
| demo.load(fn=lambda: app.search_models("", [], [], [], 20), outputs=results_display) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True) | |