Spaces:
Paused
Paused
| import os | |
| import tempfile | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| try: | |
| import gradio as gr | |
| GRADIO_AVAILABLE = True | |
| except ImportError: | |
| GRADIO_AVAILABLE = False | |
| logging.warning("Gradio not available") | |
| # Import our services | |
| try: | |
| from app.services.ocr_service import ocr_service | |
| from app.services.database_service import DatabaseService | |
| OCR_AVAILABLE = True | |
| except ImportError: | |
| OCR_AVAILABLE = False | |
| logging.warning("OCR service not available") | |
| logger = logging.getLogger(__name__) | |
| class LegalDashboardGradio: | |
| """ | |
| Gradio interface for Legal Dashboard | |
| """ | |
| def __init__(self): | |
| self.ocr_service = ocr_service if OCR_AVAILABLE else None | |
| self.db_service = None | |
| # Initialize database if available | |
| try: | |
| self.db_service = DatabaseService() | |
| except Exception as e: | |
| logger.warning(f"Database service not available: {e}") | |
| async def process_document(self, file) -> Tuple[str, str, str]: | |
| """ | |
| Process uploaded document and extract text | |
| """ | |
| if not file: | |
| return "β No file uploaded", "", "" | |
| if not self.ocr_service: | |
| return "β OCR service not available", "", "" | |
| try: | |
| # Get file path | |
| file_path = file.name | |
| file_extension = Path(file_path).suffix.lower() | |
| # Process based on file type | |
| if file_extension == '.pdf': | |
| result = await self.ocr_service.extract_text_from_pdf(file_path) | |
| elif file_extension in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']: | |
| result = await self.ocr_service.extract_text_from_image(file_path) | |
| else: | |
| return f"β Unsupported file type: {file_extension}", "", "" | |
| if result["success"]: | |
| # Process text with NLP if available | |
| processed = await self.ocr_service.process_text(result["text"]) | |
| # Create status message | |
| status = f"β Successfully processed using {result['method']}" | |
| # Create metadata info | |
| metadata = f""" | |
| **Processing Details:** | |
| - Method: {result['method']} | |
| - Character Count: {len(result['text'])} | |
| - Pages: {len(result.get('pages', []))} | |
| """ | |
| if processed.get('entities'): | |
| entities_info = "\n**Named Entities Found:**\n" | |
| for ent in processed['entities'][:10]: # Show first 10 entities | |
| entities_info += f"- {ent['text']} ({ent['label']})\n" | |
| metadata += entities_info | |
| return status, result["text"], metadata | |
| else: | |
| error_msg = result.get("metadata", {}).get("error", "Unknown error") | |
| return f"β Processing failed: {error_msg}", "", "" | |
| except Exception as e: | |
| logger.error(f"Document processing error: {e}") | |
| return f"β Error: {str(e)}", "", "" | |
| def search_documents(self, query: str) -> str: | |
| """ | |
| Search in processed documents | |
| """ | |
| if not query.strip(): | |
| return "Please enter a search query" | |
| if not self.db_service: | |
| return "Database service not available" | |
| try: | |
| # This would search in the database | |
| # For now, return a placeholder | |
| return f"Search results for '{query}' would appear here.\n\nDatabase integration coming soon..." | |
| except Exception as e: | |
| return f"Search error: {str(e)}" | |
| def get_system_status(self) -> str: | |
| """ | |
| Get system status information | |
| """ | |
| try: | |
| status = [] | |
| # OCR Service Status | |
| if self.ocr_service: | |
| ocr_status = self.ocr_service.get_service_status() | |
| status.append("π **OCR Service:**") | |
| status.append(f" - Status: {'β Ready' if ocr_status['fallback_ready'] else 'β Not Ready'}") | |
| status.append(f" - Transformers: {'β Available' if ocr_status['transformers_ready'] else 'β Not Available'}") | |
| status.append(f" - spaCy: {'β Available' if ocr_status['spacy_ready'] else 'β Not Available'}") | |
| status.append(f" - Models: {', '.join(ocr_status['models_loaded']) if ocr_status['models_loaded'] else 'None'}") | |
| else: | |
| status.append("π **OCR Service:** β Not Available") | |
| # Database Service Status | |
| if self.db_service: | |
| status.append("\nπΎ **Database Service:** β Available") | |
| else: | |
| status.append("\nπΎ **Database Service:** β Not Available") | |
| # System Info | |
| status.append(f"\nπ₯οΈ **System Info:**") | |
| status.append(f" - Python: Available") | |
| status.append(f" - Gradio: {'β Available' if GRADIO_AVAILABLE else 'β Not Available'}") | |
| return "\n".join(status) | |
| except Exception as e: | |
| return f"Error getting system status: {str(e)}" | |
| def create_gradio_interface(): | |
| """ | |
| Create and return the Gradio interface | |
| """ | |
| if not GRADIO_AVAILABLE: | |
| return None | |
| dashboard = LegalDashboardGradio() | |
| # Custom CSS | |
| css = """ | |
| .gradio-container { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| } | |
| .main-header { | |
| text-align: center; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| } | |
| .status-box { | |
| background: #f8f9fa; | |
| border-left: 4px solid #28a745; | |
| padding: 15px; | |
| border-radius: 5px; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="Legal Dashboard", theme=gr.themes.Soft()) as iface: | |
| # Header | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>ποΈ Legal Dashboard</h1> | |
| <p>Advanced Legal Document Management System with AI-Powered OCR</p> | |
| </div> | |
| """) | |
| with gr.Tab("π Document Processing"): | |
| gr.Markdown("## Upload and Process Documents") | |
| gr.Markdown("Upload PDF files or images to extract text using advanced OCR technology.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| label="Upload Document", | |
| file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff"], | |
| type="file" | |
| ) | |
| process_btn = gr.Button("π Process Document", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| status_output = gr.Textbox( | |
| label="Processing Status", | |
| placeholder="Upload a document and click 'Process Document' to begin...", | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| extracted_text = gr.Textbox( | |
| label="Extracted Text", | |
| placeholder="Processed text will appear here...", | |
| lines=15, | |
| max_lines=30, | |
| interactive=False | |
| ) | |
| with gr.Column(): | |
| metadata_output = gr.Textbox( | |
| label="Processing Details", | |
| placeholder="Processing metadata and analysis will appear here...", | |
| lines=15, | |
| max_lines=30, | |
| interactive=False | |
| ) | |
| # Connect the processing function | |
| process_btn.click( | |
| fn=dashboard.process_document, | |
| inputs=[file_input], | |
| outputs=[status_output, extracted_text, metadata_output] | |
| ) | |
| with gr.Tab("π Search Documents"): | |
| gr.Markdown("## Search Processed Documents") | |
| gr.Markdown("Search through previously processed documents using keywords and phrases.") | |
| with gr.Row(): | |
| search_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="Enter keywords to search...", | |
| scale=3 | |
| ) | |
| search_btn = gr.Button("π Search", variant="primary", scale=1) | |
| search_results = gr.Textbox( | |
| label="Search Results", | |
| placeholder="Search results will appear here...", | |
| lines=10, | |
| interactive=False | |
| ) | |
| # Connect search function | |
| search_btn.click( | |
| fn=dashboard.search_documents, | |
| inputs=[search_input], | |
| outputs=[search_results] | |
| ) | |
| with gr.Tab("βοΈ System Status"): | |
| gr.Markdown("## System Status and Information") | |
| status_btn = gr.Button("π Refresh Status", variant="secondary") | |
| system_status = gr.Textbox( | |
| label="System Status", | |
| placeholder="Click 'Refresh Status' to check system health...", | |
| lines=15, | |
| interactive=False | |
| ) | |
| # Connect status function | |
| status_btn.click( | |
| fn=dashboard.get_system_status, | |
| outputs=[system_status] | |
| ) | |
| # Auto-load status on interface start | |
| iface.load( | |
| fn=dashboard.get_system_status, | |
| outputs=[system_status] | |
| ) | |
| with gr.Tab("π About"): | |
| gr.Markdown(""" | |
| ## Legal Dashboard | |
| **Advanced Legal Document Management System** | |
| ### Features: | |
| - π **PDF Processing**: Extract text from PDF documents with high accuracy | |
| - πΌοΈ **Image OCR**: Process scanned documents and images | |
| - π§ **AI-Powered**: Uses advanced transformer models for text recognition | |
| - π **Smart Search**: Intelligent search capabilities across documents | |
| - π **Analytics**: Document analysis and metadata extraction | |
| - π **Secure**: Privacy-focused document processing | |
| ### Supported Formats: | |
| - **Documents**: PDF | |
| - **Images**: JPG, JPEG, PNG, BMP, TIFF | |
| ### Technology Stack: | |
| - **OCR**: PyMuPDF, OpenCV, Transformers (TrOCR) | |
| - **NLP**: spaCy for named entity recognition | |
| - **ML**: PyTorch, Hugging Face Transformers | |
| - **Interface**: Gradio for web interface | |
| ### Usage: | |
| 1. Upload your document using the **Document Processing** tab | |
| 2. Click **Process Document** to extract text | |
| 3. Use the **Search** tab to find specific content | |
| 4. Check **System Status** for service health | |
| --- | |
| *This system is designed for legal professionals to efficiently process and manage legal documents with the power of AI.* | |
| """) | |
| return iface | |
| def launch_gradio_app(): | |
| """ | |
| Launch the Gradio application | |
| """ | |
| if not GRADIO_AVAILABLE: | |
| print("β Gradio not available. Please install gradio: pip install gradio") | |
| return None | |
| iface = create_gradio_interface() | |
| if iface: | |
| print("π Starting Legal Dashboard Gradio Interface...") | |
| iface.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| show_tips=True, | |
| enable_queue=True | |
| ) | |
| return iface | |
| if __name__ == "__main__": | |
| launch_gradio_app() |