Spaces:

OpenMed
/

openmed-ner-models

Running

App Files Files

MaziyarPanahi commited on Jul 21

Commit

0bf0309

1 Parent(s): 3013461

init

Browse files

Files changed (4) hide show

README.md +3 -4
app.py +426 -4
data/openmed_models_database.csv +0 -0
requirements.txt +7 -0

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-title: Openmed Ner Models
-emoji: 😻
-colorFrom: yellow
 colorTo: green
 sdk: gradio
 sdk_version: 5.38.0
@@ -11,4 +11,3 @@ license: apache-2.0
 short_description: It help you find the best medical and clinical NER models
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: OpenMed NER Model Discovery
+emoji: 🔬
+colorFrom: blue
 colorTo: green
 sdk: gradio
 sdk_version: 5.38.0
 short_description: It help you find the best medical and clinical NER models
 ---

app.py CHANGED Viewed

@@ -1,7 +1,429 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+#!/usr/bin/env python3
+"""
+OpenMed NER Model Discovery App
+A beautiful Gradio interface for exploring and discovering OpenMed NER models
+"""
 import gradio as gr
+import pandas as pd
+from pathlib import Path
+import re
+from collections import Counter
+class OpenMedModelDiscovery:
+    def __init__(self):
+        self.data_file = Path(__file__).parent / "data" / "openmed_models_database.csv"
+        self.df = pd.read_csv(self.data_file)
+        # Clean and prepare data
+        self._prepare_data()
+        # Define entity colors
+        self.entity_colors = {
+            "Chemical": "#2E8B57",  # SeaGreen
+            "DNA": "#4169E1",  # RoyalBlue
+            "RNA": "#1E90FF",  # DodgerBlue
+            "Protein": "#9932CC",  # DarkOrchid
+            "Gene": "#8A2BE2",  # BlueViolet
+            "Gene/Protein": "#6A5ACD",  # SlateBlue
+            "Disease": "#DC143C",  # Crimson
+            "Cell Line": "#FF6347",  # Tomato
+            "Cell Type": "#FF4500",  # OrangeRed
+            "Cell": "#FF8C00",  # DarkOrange
+            "Anatomy": "#32CD32",  # LimeGreen
+            "Species": "#228B22",  # ForestGreen
+            "Cancer": "#8B0000",  # DarkRed
+            "Clinical": "#4682B4",  # SteelBlue
+            "Protein Complex": "#9370DB",  # MediumPurple
+            "Protein Family": "#8B008B",  # DarkMagenta
+            "Protein Variant": "#9400D3",  # Violet
+            "Amino Acid": "#BA55D3",  # MediumOrchid
+            "Cellular Component": "#20B2AA",  # LightSeaGreen
+            "Default": "#696969",  # DimGray
+        }
+    def _prepare_data(self):
+        """Clean and prepare the data for better display"""
+        # Fill missing values
+        self.df["entities"] = self.df["entities"].fillna("")
+        self.df["size_mb"] = pd.to_numeric(self.df["size_mb"], errors="coerce")
+        # Create size categories
+        self.df["size_category"] = self.df["size_mb"].apply(self._categorize_size)
+        # Split entities into lists for easier filtering
+        self.df["entity_list"] = self.df["entities"].apply(
+            lambda x: [e.strip() for e in x.split(",")] if x else []
+        )
+    def _categorize_size(self, size_mb):
+        """Categorize model size"""
+        if pd.isna(size_mb):
+            return "Unknown"
+        elif size_mb < 100:
+            return "Compact (<100M)"
+        elif size_mb < 200:
+            return "Medium (100-200M)"
+        elif size_mb < 400:
+            return "Large (200-400M)"
+        else:
+            return "XLarge (>400M)"
+    def create_entity_badge(self, entity):
+        """Create a colored badge for an entity type"""
+        color = self.entity_colors.get(entity, self.entity_colors["Default"])
+        return f'<span style="background-color: {color}; color: white; padding: 3px 8px; border-radius: 12px; font-size: 12px; margin: 3px 4px; display: inline-block; line-height: 1.4;">{entity}</span>'
+    def create_model_card(self, row):
+        """Create a beautiful model card HTML"""
+        entities_html = " ".join(
+            [self.create_entity_badge(e) for e in row["entity_list"] if e]
+        )
+        size_text = f"{row['size_mb']:.0f}M" if pd.notna(row["size_mb"]) else "Unknown"
+        card_html = f"""
+        <div style="border: 1px solid #ddd; border-radius: 8px; padding: 16px; margin: 8px 0; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);">
+            <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px;">
+                <h3 style="margin: 0; color: #2c3e50; font-size: 18px;">{row['short_name']}</h3>
+                <span style="background-color: #6c757d; color: white; padding: 4px 8px; border-radius: 4px; font-size: 12px;">{row['architecture']}</span>
+            </div>
+            <div style="margin-bottom: 8px;">
+                <strong>Domain:</strong> <span style="color: #495057;">{row['domain']}</span> |
+                <strong>Size:</strong> <span style="color: #495057;">{size_text}</span>
+            </div>
+            <div style="margin-bottom: 12px;">
+                <strong>Entities:</strong><br>
+                <div style="margin-top: 6px; line-height: 1.6;">
+                    {entities_html if entities_html else '<span style="color: #6c757d; margin: 20px;">No entities available</span>'}
+                </div>
+            </div>
+            <div style="margin-bottom: 12px;">
+                <strong>Description:</strong><br>
+                <span style="color: #6c757d; font-style: italic;">{row['description']}</span>
+            </div>
+            <div style="display: flex; gap: 8px; margin-bottom: 8px;">
+                <a href="{row['hf_link']}" target="_blank" style="background-color: #007bff; color: white; padding: 6px 12px; border-radius: 4px; text-decoration: none; font-size: 12px;">🤗 View on HF</a>
+                <button onclick="copyToClipboard('{row['code_snippet']}')" style="background-color: #28a745; color: white; padding: 6px 12px; border-radius: 4px; border: none; cursor: pointer; font-size: 12px;">📋 Copy Code</button>
+            </div>
+            <details style="margin-top: 8px;">
+                <summary style="cursor: pointer; color: #007bff;">📝 Usage Code</summary>
+                <pre style="background-color: #f8f9fa; padding: 8px; border-radius: 4px; margin-top: 4px; font-size: 11px; overflow-x: auto;"><code>from transformers import {row['code_snippet']}</code></pre>
+            </details>
+        </div>
+        """
+        return card_html
+    def search_models(
+        self, text_query, entity_filters, domain_filters, size_filters, limit=20
+    ):
+        """Search and filter models based on criteria"""
+        filtered_df = self.df.copy()
+        # Text search
+        if text_query.strip():
+            text_mask = (
+                filtered_df["model_name"].str.contains(text_query, case=False, na=False)
+                | filtered_df["short_name"].str.contains(
+                    text_query, case=False, na=False
+                )
+                | filtered_df["domain"].str.contains(text_query, case=False, na=False)
+                | filtered_df["description"].str.contains(
+                    text_query, case=False, na=False
+                )
+                | filtered_df["entities"].str.contains(text_query, case=False, na=False)
+            )
+            filtered_df = filtered_df[text_mask]
+        # Entity filters
+        if entity_filters:
+            entity_mask = filtered_df["entity_list"].apply(
+                lambda entities: any(entity in entity_filters for entity in entities)
+            )
+            filtered_df = filtered_df[entity_mask]
+        # Domain filters
+        if domain_filters:
+            filtered_df = filtered_df[filtered_df["domain"].isin(domain_filters)]
+        # Size filters
+        if size_filters:
+            filtered_df = filtered_df[filtered_df["size_category"].isin(size_filters)]
+        # Limit results
+        filtered_df = filtered_df.head(limit)
+        if filtered_df.empty:
+            return "<div style='text-align: center; padding: 40px; color: #6c757d;'><h3>No models found 😞</h3><p>Try adjusting your search criteria</p></div>"
+        # Create model cards
+        cards_html = f"<div style='margin-bottom: 16px;'><h2>Found {len(filtered_df)} models</h2></div>"
+        for _, row in filtered_df.iterrows():
+            cards_html += self.create_model_card(row)
+        return cards_html
+    def get_entity_stats(self):
+        """Get entity statistics"""
+        all_entities = []
+        for entity_list in self.df["entity_list"]:
+            all_entities.extend(entity_list)
+        entity_counts = Counter(all_entities)
+        # Remove empty strings
+        entity_counts = {k: v for k, v in entity_counts.items() if k}
+        return entity_counts
+    def get_filter_options(self):
+        """Get all available filter options"""
+        # Get unique domains
+        domains = sorted(self.df["domain"].unique())
+        # Get unique sizes
+        sizes = sorted(self.df["size_category"].unique())
+        # Get all unique entities
+        all_entities = set()
+        for entity_list in self.df["entity_list"]:
+            all_entities.update(entity_list)
+        entities = sorted([e for e in all_entities if e])  # Remove empty strings
+        return entities, domains, sizes
+# Initialize the app
+app = OpenMedModelDiscovery()
+# Get filter options
+ALL_ENTITIES = [
+    "amino_acid",
+    "anatomical_system",
+    "anatomy",
+    "cancer",
+    "cell",
+    "cell_line",
+    "cell_line_name",
+    "cell_type",
+    "cellular_component",
+    "chemical",
+    "clinical",
+    "developing_anatomical_structure",
+    "disease",
+    "dna",
+    "gene/protein",
+    "gene_or_protein",
+    "immaterial_anatomical_entity",
+    "multi_tissue_structure",
+    "organ",
+    "organism",
+    "organism_subdivision",
+    "organism_substance",
+    "pathological_formation",
+    "protein",
+    "protein_complex",
+    "protein_family",
+    "protein_variant",
+    "rna",
+    "species",
+    "tissue",
+]
+entities, domains, sizes = app.get_filter_options()
+# Use comprehensive entity list instead of dynamic extraction for UI
+entities = ALL_ENTITIES
+# Custom CSS
+custom_css = """
+<style>
+.gradio-container {
+    max-width: 1200px !important;
+}
+.model-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
+    gap: 16px;
+    margin-top: 16px;
+}
+/* Copy to clipboard functionality */
+</style>
+<script>
+function copyToClipboard(text) {
+    navigator.clipboard.writeText(text).then(function() {
+        alert('Code copied to clipboard!');
+    });
+}
+</script>
+"""
+# Create the Gradio interface
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue", secondary_hue="green", neutral_hue="slate"
+    ),
+    css=custom_css,
+    title="🔬 OpenMed NER Model Discovery App",
+) as demo:
+    # Header
+    gr.HTML(
+        """
+    <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
+        <h1 style="color: white; margin: 0; font-size: 36px;">🔬 OpenMed NER Model Discovery</h1>
+        <p style="color: white; margin: 10px 0 0 0; font-size: 18px;">Discover the perfect NER model for your biomedical text analysis from 380+ free OpenMed models</p>
+    </div>
+    """
+    )
+    with gr.Tabs():
+        # Search Tab
+        with gr.Tab("🔍 Search Models", elem_id="search-tab"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 🎯 Search & Filter")
+                    text_search = gr.Textbox(
+                        label="Search Models",
+                        placeholder="e.g., chemical detection, cancer genomics, DNA...",
+                        lines=1,
+                    )
+                    entity_filter = gr.Dropdown(
+                        choices=entities,
+                        label="Entities",
+                        info="Search and select entities (e.g., Chemical, DNA, Disease)...",
+                        multiselect=True,
+                        value=[],
+                        interactive=True,
+                    )
+                    with gr.Row():
+                        domain_filter = gr.CheckboxGroup(
+                            choices=domains, label="Domains", value=[]
+                        )
+                        size_filter = gr.CheckboxGroup(
+                            choices=sizes, label="Model Size", value=[]
+                        )
+                    result_limit = gr.Slider(
+                        minimum=5, maximum=50, value=20, step=5, label="Max Results"
+                    )
+                    clear_btn = gr.Button("🗑️ Clear Filters", variant="secondary")
+                with gr.Column(scale=2):
+                    gr.Markdown("### 📋 Search Results")
+                    results_display = gr.HTML()
+            # Auto-search on any input change
+            def auto_search(*args):
+                return app.search_models(*args)
+            # Connect auto-search to all inputs
+            for component in [
+                text_search,
+                entity_filter,
+                domain_filter,
+                size_filter,
+                result_limit,
+            ]:
+                component.change(
+                    fn=auto_search,
+                    inputs=[
+                        text_search,
+                        entity_filter,
+                        domain_filter,
+                        size_filter,
+                        result_limit,
+                    ],
+                    outputs=results_display,
+                )
+            # Clear filters
+            def clear_filters():
+                return "", [], [], [], 20
+            clear_btn.click(
+                fn=clear_filters,
+                outputs=[
+                    text_search,
+                    entity_filter,
+                    domain_filter,
+                    size_filter,
+                    result_limit,
+                ],
+            )
+        # About Tab
+        with gr.Tab("ℹ️ About", elem_id="about-tab"):
+            gr.Markdown(
+                """
+            # 🔬 About OpenMed NER Model Discovery
+            ## What is OpenMed?
+            OpenMed is a collection of **380+ state-of-the-art Named Entity Recognition (NER) models** for biomedical and clinical text analysis. All models are:
+            - ✅ **Completely Free** - Apache 2.0 license
+            - ✅ **High Performance** - F1 scores up to 99.8%
+            - ✅ **Ready to Use** - Compatible with Hugging Face Transformers
+            - ✅ **Diverse** - Covers 8+ medical domains and 20+ entity types
+            ## 🎯 Use Cases
+            - **Drug Discovery** - Identify chemicals and compounds
+            - **Clinical Research** - Extract diseases and symptoms
+            - **Genomics** - Detect genes, proteins, and DNA/RNA
+            - **Medical Records** - Parse anatomical terms and clinical notes
+            - **Pharmacovigilance** - Monitor drug safety and adverse events
+            ## 🏗️ Model Architectures
+            - **BERT** - Bidirectional transformers for robust performance
+            - **DeBERTa** - Enhanced attention mechanisms
+            - **RoBERTa** - Optimized training for biomedical text
+            - **ModernBERT** - Latest advances in transformer architecture
+            ## 📊 Coverage
+            - **8 Medical Domains** - Pharmacology, Genomics, Oncology, Pathology, etc.
+            - **20+ Entity Types** - Chemical, DNA, RNA, Protein, Disease, Anatomy, etc.
+            - **Multiple Sizes** - From 33M to 568M parameters
+            - **380+ Models** - Comprehensive coverage for any biomedical NLP task
+            ## 🚀 Getting Started
+            1. **Search** - Use the search tab to find models by domain, entity type, or keywords
+            2. **Compare** - View model cards with performance metrics and descriptions
+            3. **Copy Code** - Get ready-to-use code snippets
+            4. **Deploy** - Download and use with Hugging Face Transformers
+            ## 📧 Contact & Support
+            - **Models** - [OpenMed on Hugging Face](https://huggingface.co/OpenMed)
+            - **Paper** - Coming soon on arXiv
+            - **Community** - Join discussions on Hugging Face
+            ---
+            Built with ❤️ for the biomedical research community
+            """
+            )
+    # Load initial results
+    demo.load(fn=lambda: app.search_models("", [], [], [], 20), outputs=results_display)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)

data/openmed_models_database.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+pandas
+numpy
+requests
+transformers
+torch
+entrypoints