Spaces:

Really-amin
/

Hoghoghi

Paused

App Files Files Community

Hoghoghi / huggingface_space /app.py

Really-amin

Update huggingface_space/app.py

25da451 verified about 2 months ago

raw

history blame

12.4 kB

	import os
	import tempfile
	import logging
	from pathlib import Path
	from typing import Optional, Tuple

	try:
	import gradio as gr
	GRADIO_AVAILABLE = True
	except ImportError:
	GRADIO_AVAILABLE = False
	logging.warning("Gradio not available")

	# Import our services
	try:
	from app.services.ocr_service import ocr_service
	from app.services.database_service import DatabaseService
	OCR_AVAILABLE = True
	except ImportError:
	OCR_AVAILABLE = False
	logging.warning("OCR service not available")

	logger = logging.getLogger(__name__)

	class LegalDashboardGradio:
	"""
	Gradio interface for Legal Dashboard
	"""

	def __init__(self):
	self.ocr_service = ocr_service if OCR_AVAILABLE else None
	self.db_service = None

	# Initialize database if available
	try:
	self.db_service = DatabaseService()
	except Exception as e:
	logger.warning(f"Database service not available: {e}")

	async def process_document(self, file) -> Tuple[str, str, str]:
	"""
	Process uploaded document and extract text
	"""
	if not file:
	return "❌ No file uploaded", "", ""

	if not self.ocr_service:
	return "❌ OCR service not available", "", ""

	try:
	# Get file path
	file_path = file.name
	file_extension = Path(file_path).suffix.lower()

	# Process based on file type
	if file_extension == '.pdf':
	result = await self.ocr_service.extract_text_from_pdf(file_path)
	elif file_extension in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
	result = await self.ocr_service.extract_text_from_image(file_path)
	else:
	return f"❌ Unsupported file type: {file_extension}", "", ""

	if result["success"]:
	# Process text with NLP if available
	processed = await self.ocr_service.process_text(result["text"])

	# Create status message
	status = f"✅ Successfully processed using {result['method']}"

	# Create metadata info
	metadata = f"""
	Processing Details:
	- Method: {result['method']}
	- Character Count: {len(result['text'])}
	- Pages: {len(result.get('pages', []))}
	"""

	if processed.get('entities'):
	entities_info = "\nNamed Entities Found:\n"
	for ent in processed['entities'][:10]: # Show first 10 entities
	entities_info += f"- {ent['text']} ({ent['label']})\n"
	metadata += entities_info

	return status, result["text"], metadata
	else:
	error_msg = result.get("metadata", {}).get("error", "Unknown error")
	return f"❌ Processing failed: {error_msg}", "", ""

	except Exception as e:
	logger.error(f"Document processing error: {e}")
	return f"❌ Error: {str(e)}", "", ""

	def search_documents(self, query: str) -> str:
	"""
	Search in processed documents
	"""
	if not query.strip():
	return "Please enter a search query"

	if not self.db_service:
	return "Database service not available"

	try:
	# This would search in the database
	# For now, return a placeholder
	return f"Search results for '{query}' would appear here.\n\nDatabase integration coming soon..."
	except Exception as e:
	return f"Search error: {str(e)}"

	def get_system_status(self) -> str:
	"""
	Get system status information
	"""
	try:
	status = []

	# OCR Service Status
	if self.ocr_service:
	ocr_status = self.ocr_service.get_service_status()
	status.append("🔍 OCR Service:")
	status.append(f" - Status: {'✅ Ready' if ocr_status['fallback_ready'] else '❌ Not Ready'}")
	status.append(f" - Transformers: {'✅ Available' if ocr_status['transformers_ready'] else '❌ Not Available'}")
	status.append(f" - spaCy: {'✅ Available' if ocr_status['spacy_ready'] else '❌ Not Available'}")
	status.append(f" - Models: {', '.join(ocr_status['models_loaded']) if ocr_status['models_loaded'] else 'None'}")
	else:
	status.append("🔍 OCR Service: ❌ Not Available")

	# Database Service Status
	if self.db_service:
	status.append("\n💾 Database Service: ✅ Available")
	else:
	status.append("\n💾 Database Service: ❌ Not Available")

	# System Info
	status.append(f"\n🖥️ System Info:")
	status.append(f" - Python: Available")
	status.append(f" - Gradio: {'✅ Available' if GRADIO_AVAILABLE else '❌ Not Available'}")

	return "\n".join(status)

	except Exception as e:
	return f"Error getting system status: {str(e)}"

	def create_gradio_interface():
	"""
	Create and return the Gradio interface
	"""
	if not GRADIO_AVAILABLE:
	return None

	dashboard = LegalDashboardGradio()

	# Custom CSS
	css = """
	.gradio-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	}
	.main-header {
	text-align: center;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 10px;
	margin-bottom: 20px;
	}
	.status-box {
	background: #f8f9fa;
	border-left: 4px solid #28a745;
	padding: 15px;
	border-radius: 5px;
	}
	"""

	with gr.Blocks(css=css, title="Legal Dashboard", theme=gr.themes.Soft()) as iface:

	# Header
	gr.HTML("""
	<div class="main-header">
	<h1>🏛️ Legal Dashboard</h1>
	<p>Advanced Legal Document Management System with AI-Powered OCR</p>
	</div>
	""")

	with gr.Tab("📄 Document Processing"):
	gr.Markdown("## Upload and Process Documents")
	gr.Markdown("Upload PDF files or images to extract text using advanced OCR technology.")

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(
	label="Upload Document",
	file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff"],
	type="file"
	)
	process_btn = gr.Button("🔍 Process Document", variant="primary", size="lg")

	with gr.Column(scale=2):
	status_output = gr.Textbox(
	label="Processing Status",
	placeholder="Upload a document and click 'Process Document' to begin...",
	interactive=False
	)

	with gr.Row():
	with gr.Column():
	extracted_text = gr.Textbox(
	label="Extracted Text",
	placeholder="Processed text will appear here...",
	lines=15,
	max_lines=30,
	interactive=False
	)

	with gr.Column():
	metadata_output = gr.Textbox(
	label="Processing Details",
	placeholder="Processing metadata and analysis will appear here...",
	lines=15,
	max_lines=30,
	interactive=False
	)

	# Connect the processing function
	process_btn.click(
	fn=dashboard.process_document,
	inputs=[file_input],
	outputs=[status_output, extracted_text, metadata_output]
	)

	with gr.Tab("🔍 Search Documents"):
	gr.Markdown("## Search Processed Documents")
	gr.Markdown("Search through previously processed documents using keywords and phrases.")

	with gr.Row():
	search_input = gr.Textbox(
	label="Search Query",
	placeholder="Enter keywords to search...",
	scale=3
	)
	search_btn = gr.Button("🔍 Search", variant="primary", scale=1)

	search_results = gr.Textbox(
	label="Search Results",
	placeholder="Search results will appear here...",
	lines=10,
	interactive=False
	)

	# Connect search function
	search_btn.click(
	fn=dashboard.search_documents,
	inputs=[search_input],
	outputs=[search_results]
	)

	with gr.Tab("⚙️ System Status"):
	gr.Markdown("## System Status and Information")

	status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
	system_status = gr.Textbox(
	label="System Status",
	placeholder="Click 'Refresh Status' to check system health...",
	lines=15,
	interactive=False
	)

	# Connect status function
	status_btn.click(
	fn=dashboard.get_system_status,
	outputs=[system_status]
	)

	# Auto-load status on interface start
	iface.load(
	fn=dashboard.get_system_status,
	outputs=[system_status]
	)

	with gr.Tab("📚 About"):
	gr.Markdown("""
	## Legal Dashboard

	Advanced Legal Document Management System

	### Features:
	- 📄 PDF Processing: Extract text from PDF documents with high accuracy
	- 🖼️ Image OCR: Process scanned documents and images
	- 🧠 AI-Powered: Uses advanced transformer models for text recognition
	- 🔍 Smart Search: Intelligent search capabilities across documents
	- 📊 Analytics: Document analysis and metadata extraction
	- 🔒 Secure: Privacy-focused document processing

	### Supported Formats:
	- Documents: PDF
	- Images: JPG, JPEG, PNG, BMP, TIFF

	### Technology Stack:
	- OCR: PyMuPDF, OpenCV, Transformers (TrOCR)
	- NLP: spaCy for named entity recognition
	- ML: PyTorch, Hugging Face Transformers
	- Interface: Gradio for web interface

	### Usage:
	1. Upload your document using the Document Processing tab
	2. Click Process Document to extract text
	3. Use the Search tab to find specific content
	4. Check System Status for service health

	---

	This system is designed for legal professionals to efficiently process and manage legal documents with the power of AI.
	""")

	return iface

	def launch_gradio_app():
	"""
	Launch the Gradio application
	"""
	if not GRADIO_AVAILABLE:
	print("❌ Gradio not available. Please install gradio: pip install gradio")
	return None

	iface = create_gradio_interface()
	if iface:
	print("🚀 Starting Legal Dashboard Gradio Interface...")
	iface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	show_tips=True,
	enable_queue=True
	)
	return iface

	if __name__ == "__main__":
	launch_gradio_app()