MaziyarPanahi commited on
Commit
0bf0309
Β·
1 Parent(s): 3013461
Files changed (4) hide show
  1. README.md +3 -4
  2. app.py +426 -4
  3. data/openmed_models_database.csv +0 -0
  4. requirements.txt +7 -0
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Openmed Ner Models
3
- emoji: 😻
4
- colorFrom: yellow
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.38.0
@@ -11,4 +11,3 @@ license: apache-2.0
11
  short_description: It help you find the best medical and clinical NER models
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OpenMed NER Model Discovery
3
+ emoji: πŸ”¬
4
+ colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.38.0
 
11
  short_description: It help you find the best medical and clinical NER models
12
  ---
13
 
 
app.py CHANGED
@@ -1,7 +1,429 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenMed NER Model Discovery App
4
+ A beautiful Gradio interface for exploring and discovering OpenMed NER models
5
+ """
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
+ from pathlib import Path
10
+ import re
11
+ from collections import Counter
12
+
13
+
14
+ class OpenMedModelDiscovery:
15
+ def __init__(self):
16
+ self.data_file = Path(__file__).parent / "data" / "openmed_models_database.csv"
17
+ self.df = pd.read_csv(self.data_file)
18
+
19
+ # Clean and prepare data
20
+ self._prepare_data()
21
+
22
+ # Define entity colors
23
+ self.entity_colors = {
24
+ "Chemical": "#2E8B57", # SeaGreen
25
+ "DNA": "#4169E1", # RoyalBlue
26
+ "RNA": "#1E90FF", # DodgerBlue
27
+ "Protein": "#9932CC", # DarkOrchid
28
+ "Gene": "#8A2BE2", # BlueViolet
29
+ "Gene/Protein": "#6A5ACD", # SlateBlue
30
+ "Disease": "#DC143C", # Crimson
31
+ "Cell Line": "#FF6347", # Tomato
32
+ "Cell Type": "#FF4500", # OrangeRed
33
+ "Cell": "#FF8C00", # DarkOrange
34
+ "Anatomy": "#32CD32", # LimeGreen
35
+ "Species": "#228B22", # ForestGreen
36
+ "Cancer": "#8B0000", # DarkRed
37
+ "Clinical": "#4682B4", # SteelBlue
38
+ "Protein Complex": "#9370DB", # MediumPurple
39
+ "Protein Family": "#8B008B", # DarkMagenta
40
+ "Protein Variant": "#9400D3", # Violet
41
+ "Amino Acid": "#BA55D3", # MediumOrchid
42
+ "Cellular Component": "#20B2AA", # LightSeaGreen
43
+ "Default": "#696969", # DimGray
44
+ }
45
+
46
+ def _prepare_data(self):
47
+ """Clean and prepare the data for better display"""
48
+ # Fill missing values
49
+ self.df["entities"] = self.df["entities"].fillna("")
50
+ self.df["size_mb"] = pd.to_numeric(self.df["size_mb"], errors="coerce")
51
+
52
+ # Create size categories
53
+ self.df["size_category"] = self.df["size_mb"].apply(self._categorize_size)
54
+
55
+ # Split entities into lists for easier filtering
56
+ self.df["entity_list"] = self.df["entities"].apply(
57
+ lambda x: [e.strip() for e in x.split(",")] if x else []
58
+ )
59
+
60
+ def _categorize_size(self, size_mb):
61
+ """Categorize model size"""
62
+ if pd.isna(size_mb):
63
+ return "Unknown"
64
+ elif size_mb < 100:
65
+ return "Compact (<100M)"
66
+ elif size_mb < 200:
67
+ return "Medium (100-200M)"
68
+ elif size_mb < 400:
69
+ return "Large (200-400M)"
70
+ else:
71
+ return "XLarge (>400M)"
72
+
73
+ def create_entity_badge(self, entity):
74
+ """Create a colored badge for an entity type"""
75
+ color = self.entity_colors.get(entity, self.entity_colors["Default"])
76
+ return f'<span style="background-color: {color}; color: white; padding: 3px 8px; border-radius: 12px; font-size: 12px; margin: 3px 4px; display: inline-block; line-height: 1.4;">{entity}</span>'
77
+
78
+ def create_model_card(self, row):
79
+ """Create a beautiful model card HTML"""
80
+ entities_html = " ".join(
81
+ [self.create_entity_badge(e) for e in row["entity_list"] if e]
82
+ )
83
+
84
+ size_text = f"{row['size_mb']:.0f}M" if pd.notna(row["size_mb"]) else "Unknown"
85
+
86
+ card_html = f"""
87
+ <div style="border: 1px solid #ddd; border-radius: 8px; padding: 16px; margin: 8px 0; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);">
88
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px;">
89
+ <h3 style="margin: 0; color: #2c3e50; font-size: 18px;">{row['short_name']}</h3>
90
+ <span style="background-color: #6c757d; color: white; padding: 4px 8px; border-radius: 4px; font-size: 12px;">{row['architecture']}</span>
91
+ </div>
92
+
93
+ <div style="margin-bottom: 8px;">
94
+ <strong>Domain:</strong> <span style="color: #495057;">{row['domain']}</span> |
95
+ <strong>Size:</strong> <span style="color: #495057;">{size_text}</span>
96
+ </div>
97
+
98
+ <div style="margin-bottom: 12px;">
99
+ <strong>Entities:</strong><br>
100
+ <div style="margin-top: 6px; line-height: 1.6;">
101
+ {entities_html if entities_html else '<span style="color: #6c757d; margin: 20px;">No entities available</span>'}
102
+ </div>
103
+ </div>
104
+
105
+ <div style="margin-bottom: 12px;">
106
+ <strong>Description:</strong><br>
107
+ <span style="color: #6c757d; font-style: italic;">{row['description']}</span>
108
+ </div>
109
+
110
+ <div style="display: flex; gap: 8px; margin-bottom: 8px;">
111
+ <a href="{row['hf_link']}" target="_blank" style="background-color: #007bff; color: white; padding: 6px 12px; border-radius: 4px; text-decoration: none; font-size: 12px;">πŸ€— View on HF</a>
112
+ <button onclick="copyToClipboard('{row['code_snippet']}')" style="background-color: #28a745; color: white; padding: 6px 12px; border-radius: 4px; border: none; cursor: pointer; font-size: 12px;">πŸ“‹ Copy Code</button>
113
+ </div>
114
+
115
+ <details style="margin-top: 8px;">
116
+ <summary style="cursor: pointer; color: #007bff;">πŸ“ Usage Code</summary>
117
+ <pre style="background-color: #f8f9fa; padding: 8px; border-radius: 4px; margin-top: 4px; font-size: 11px; overflow-x: auto;"><code>from transformers import {row['code_snippet']}</code></pre>
118
+ </details>
119
+ </div>
120
+ """
121
+ return card_html
122
+
123
+ def search_models(
124
+ self, text_query, entity_filters, domain_filters, size_filters, limit=20
125
+ ):
126
+ """Search and filter models based on criteria"""
127
+ filtered_df = self.df.copy()
128
+
129
+ # Text search
130
+ if text_query.strip():
131
+ text_mask = (
132
+ filtered_df["model_name"].str.contains(text_query, case=False, na=False)
133
+ | filtered_df["short_name"].str.contains(
134
+ text_query, case=False, na=False
135
+ )
136
+ | filtered_df["domain"].str.contains(text_query, case=False, na=False)
137
+ | filtered_df["description"].str.contains(
138
+ text_query, case=False, na=False
139
+ )
140
+ | filtered_df["entities"].str.contains(text_query, case=False, na=False)
141
+ )
142
+ filtered_df = filtered_df[text_mask]
143
+
144
+ # Entity filters
145
+ if entity_filters:
146
+ entity_mask = filtered_df["entity_list"].apply(
147
+ lambda entities: any(entity in entity_filters for entity in entities)
148
+ )
149
+ filtered_df = filtered_df[entity_mask]
150
+
151
+ # Domain filters
152
+ if domain_filters:
153
+ filtered_df = filtered_df[filtered_df["domain"].isin(domain_filters)]
154
+
155
+ # Size filters
156
+ if size_filters:
157
+ filtered_df = filtered_df[filtered_df["size_category"].isin(size_filters)]
158
+
159
+ # Limit results
160
+ filtered_df = filtered_df.head(limit)
161
+
162
+ if filtered_df.empty:
163
+ return "<div style='text-align: center; padding: 40px; color: #6c757d;'><h3>No models found 😞</h3><p>Try adjusting your search criteria</p></div>"
164
+
165
+ # Create model cards
166
+ cards_html = f"<div style='margin-bottom: 16px;'><h2>Found {len(filtered_df)} models</h2></div>"
167
+
168
+ for _, row in filtered_df.iterrows():
169
+ cards_html += self.create_model_card(row)
170
+
171
+ return cards_html
172
+
173
+ def get_entity_stats(self):
174
+ """Get entity statistics"""
175
+ all_entities = []
176
+ for entity_list in self.df["entity_list"]:
177
+ all_entities.extend(entity_list)
178
+
179
+ entity_counts = Counter(all_entities)
180
+ # Remove empty strings
181
+ entity_counts = {k: v for k, v in entity_counts.items() if k}
182
+
183
+ return entity_counts
184
+
185
+ def get_filter_options(self):
186
+ """Get all available filter options"""
187
+ # Get unique domains
188
+ domains = sorted(self.df["domain"].unique())
189
+
190
+ # Get unique sizes
191
+ sizes = sorted(self.df["size_category"].unique())
192
+
193
+ # Get all unique entities
194
+ all_entities = set()
195
+ for entity_list in self.df["entity_list"]:
196
+ all_entities.update(entity_list)
197
+ entities = sorted([e for e in all_entities if e]) # Remove empty strings
198
+
199
+ return entities, domains, sizes
200
+
201
+
202
+ # Initialize the app
203
+ app = OpenMedModelDiscovery()
204
+
205
+ # Get filter options
206
+ ALL_ENTITIES = [
207
+ "amino_acid",
208
+ "anatomical_system",
209
+ "anatomy",
210
+ "cancer",
211
+ "cell",
212
+ "cell_line",
213
+ "cell_line_name",
214
+ "cell_type",
215
+ "cellular_component",
216
+ "chemical",
217
+ "clinical",
218
+ "developing_anatomical_structure",
219
+ "disease",
220
+ "dna",
221
+ "gene/protein",
222
+ "gene_or_protein",
223
+ "immaterial_anatomical_entity",
224
+ "multi_tissue_structure",
225
+ "organ",
226
+ "organism",
227
+ "organism_subdivision",
228
+ "organism_substance",
229
+ "pathological_formation",
230
+ "protein",
231
+ "protein_complex",
232
+ "protein_family",
233
+ "protein_variant",
234
+ "rna",
235
+ "species",
236
+ "tissue",
237
+ ]
238
+
239
+ entities, domains, sizes = app.get_filter_options()
240
+
241
+ # Use comprehensive entity list instead of dynamic extraction for UI
242
+ entities = ALL_ENTITIES
243
+
244
+ # Custom CSS
245
+ custom_css = """
246
+ <style>
247
+ .gradio-container {
248
+ max-width: 1200px !important;
249
+ }
250
+
251
+ .model-grid {
252
+ display: grid;
253
+ grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
254
+ gap: 16px;
255
+ margin-top: 16px;
256
+ }
257
+
258
+ /* Copy to clipboard functionality */
259
+ </style>
260
+
261
+ <script>
262
+ function copyToClipboard(text) {
263
+ navigator.clipboard.writeText(text).then(function() {
264
+ alert('Code copied to clipboard!');
265
+ });
266
+ }
267
+ </script>
268
+ """
269
+
270
+ # Create the Gradio interface
271
+ with gr.Blocks(
272
+ theme=gr.themes.Soft(
273
+ primary_hue="blue", secondary_hue="green", neutral_hue="slate"
274
+ ),
275
+ css=custom_css,
276
+ title="πŸ”¬ OpenMed NER Model Discovery App",
277
+ ) as demo:
278
+
279
+ # Header
280
+ gr.HTML(
281
+ """
282
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
283
+ <h1 style="color: white; margin: 0; font-size: 36px;">πŸ”¬ OpenMed NER Model Discovery</h1>
284
+ <p style="color: white; margin: 10px 0 0 0; font-size: 18px;">Discover the perfect NER model for your biomedical text analysis from 380+ free OpenMed models</p>
285
+ </div>
286
+ """
287
+ )
288
+
289
+ with gr.Tabs():
290
+ # Search Tab
291
+ with gr.Tab("πŸ” Search Models", elem_id="search-tab"):
292
+ with gr.Row():
293
+ with gr.Column(scale=1):
294
+ gr.Markdown("### 🎯 Search & Filter")
295
+
296
+ text_search = gr.Textbox(
297
+ label="Search Models",
298
+ placeholder="e.g., chemical detection, cancer genomics, DNA...",
299
+ lines=1,
300
+ )
301
+
302
+ entity_filter = gr.Dropdown(
303
+ choices=entities,
304
+ label="Entities",
305
+ info="Search and select entities (e.g., Chemical, DNA, Disease)...",
306
+ multiselect=True,
307
+ value=[],
308
+ interactive=True,
309
+ )
310
+
311
+ with gr.Row():
312
+ domain_filter = gr.CheckboxGroup(
313
+ choices=domains, label="Domains", value=[]
314
+ )
315
+
316
+ size_filter = gr.CheckboxGroup(
317
+ choices=sizes, label="Model Size", value=[]
318
+ )
319
+
320
+ result_limit = gr.Slider(
321
+ minimum=5, maximum=50, value=20, step=5, label="Max Results"
322
+ )
323
+
324
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Filters", variant="secondary")
325
+
326
+ with gr.Column(scale=2):
327
+ gr.Markdown("### πŸ“‹ Search Results")
328
+ results_display = gr.HTML()
329
+
330
+ # Auto-search on any input change
331
+ def auto_search(*args):
332
+ return app.search_models(*args)
333
+
334
+ # Connect auto-search to all inputs
335
+ for component in [
336
+ text_search,
337
+ entity_filter,
338
+ domain_filter,
339
+ size_filter,
340
+ result_limit,
341
+ ]:
342
+ component.change(
343
+ fn=auto_search,
344
+ inputs=[
345
+ text_search,
346
+ entity_filter,
347
+ domain_filter,
348
+ size_filter,
349
+ result_limit,
350
+ ],
351
+ outputs=results_display,
352
+ )
353
+
354
+ # Clear filters
355
+ def clear_filters():
356
+ return "", [], [], [], 20
357
+
358
+ clear_btn.click(
359
+ fn=clear_filters,
360
+ outputs=[
361
+ text_search,
362
+ entity_filter,
363
+ domain_filter,
364
+ size_filter,
365
+ result_limit,
366
+ ],
367
+ )
368
+
369
+ # About Tab
370
+ with gr.Tab("ℹ️ About", elem_id="about-tab"):
371
+ gr.Markdown(
372
+ """
373
+ # πŸ”¬ About OpenMed NER Model Discovery
374
+
375
+ ## What is OpenMed?
376
+
377
+ OpenMed is a collection of **380+ state-of-the-art Named Entity Recognition (NER) models** for biomedical and clinical text analysis. All models are:
378
+
379
+ - βœ… **Completely Free** - Apache 2.0 license
380
+ - βœ… **High Performance** - F1 scores up to 99.8%
381
+ - βœ… **Ready to Use** - Compatible with Hugging Face Transformers
382
+ - βœ… **Diverse** - Covers 8+ medical domains and 20+ entity types
383
+
384
+ ## 🎯 Use Cases
385
+
386
+ - **Drug Discovery** - Identify chemicals and compounds
387
+ - **Clinical Research** - Extract diseases and symptoms
388
+ - **Genomics** - Detect genes, proteins, and DNA/RNA
389
+ - **Medical Records** - Parse anatomical terms and clinical notes
390
+ - **Pharmacovigilance** - Monitor drug safety and adverse events
391
+
392
+ ## πŸ—οΈ Model Architectures
393
+
394
+ - **BERT** - Bidirectional transformers for robust performance
395
+ - **DeBERTa** - Enhanced attention mechanisms
396
+ - **RoBERTa** - Optimized training for biomedical text
397
+ - **ModernBERT** - Latest advances in transformer architecture
398
+
399
+ ## πŸ“Š Coverage
400
+
401
+ - **8 Medical Domains** - Pharmacology, Genomics, Oncology, Pathology, etc.
402
+ - **20+ Entity Types** - Chemical, DNA, RNA, Protein, Disease, Anatomy, etc.
403
+ - **Multiple Sizes** - From 33M to 568M parameters
404
+ - **380+ Models** - Comprehensive coverage for any biomedical NLP task
405
+
406
+ ## πŸš€ Getting Started
407
+
408
+ 1. **Search** - Use the search tab to find models by domain, entity type, or keywords
409
+ 2. **Compare** - View model cards with performance metrics and descriptions
410
+ 3. **Copy Code** - Get ready-to-use code snippets
411
+ 4. **Deploy** - Download and use with Hugging Face Transformers
412
+
413
+ ## πŸ“§ Contact & Support
414
+
415
+ - **Models** - [OpenMed on Hugging Face](https://huggingface.co/OpenMed)
416
+ - **Paper** - Coming soon on arXiv
417
+ - **Community** - Join discussions on Hugging Face
418
+
419
+ ---
420
+
421
+ Built with ❀️ for the biomedical research community
422
+ """
423
+ )
424
 
425
+ # Load initial results
426
+ demo.load(fn=lambda: app.search_models("", [], [], [], 20), outputs=results_display)
427
 
428
+ if __name__ == "__main__":
429
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)
data/openmed_models_database.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ requests
5
+ transformers
6
+ torch
7
+ entrypoints