Really-amin commited on
Commit
25da451
·
verified ·
1 Parent(s): 33594b3

Update huggingface_space/app.py

Browse files
Files changed (1) hide show
  1. huggingface_space/app.py +308 -291
huggingface_space/app.py CHANGED
@@ -1,321 +1,338 @@
1
- """
2
- Gradio Interface for Legal Dashboard - Hugging Face Spaces
3
- ==========================================================
4
- This provides a web interface for the Legal Dashboard using Gradio,
5
- optimized for deployment on Hugging Face Spaces.
6
- """
7
-
8
  import os
9
- import sys
10
- import asyncio
11
- import threading
12
- import time
13
- import gradio as gr
14
- import requests
15
- from typing import Optional, Dict, Any
16
 
17
- # Add app directory to Python path
18
- sys.path.insert(0, '/app')
19
- sys.path.insert(0, '.')
 
 
 
20
 
21
- # Set environment variables for the app
22
- os.environ.setdefault('DATABASE_DIR', '/tmp/legal_dashboard')
23
- os.environ.setdefault('PYTHONPATH', '/app')
24
- os.environ.setdefault('LOG_LEVEL', 'INFO')
 
 
 
 
25
 
26
- # Global variables
27
- fastapi_server = None
28
- server_port = 7860
29
 
30
- def start_fastapi_server():
31
- """Start FastAPI server in a separate thread"""
32
- global fastapi_server, server_port
 
33
 
34
- try:
35
- import uvicorn
36
- from app.main import app
37
 
38
- print(f"🚀 Starting FastAPI server on port {server_port}...")
39
-
40
- # Run FastAPI server
41
- uvicorn.run(
42
- app,
43
- host="127.0.0.1",
44
- port=server_port,
45
- log_level="info",
46
- access_log=False
47
- )
48
- except Exception as e:
49
- print(f"❌ Failed to start FastAPI server: {e}")
50
- return None
51
-
52
- def wait_for_server(timeout=30):
53
- """Wait for FastAPI server to be ready"""
54
- start_time = time.time()
55
-
56
- while time.time() - start_time < timeout:
57
  try:
58
- response = requests.get(f"http://127.0.0.1:{server_port}/health", timeout=2)
59
- if response.status_code == 200:
60
- print(" FastAPI server is ready!")
61
- return True
62
- except:
63
- pass
64
- time.sleep(1)
65
-
66
- print("❌ FastAPI server failed to start within timeout")
67
- return False
68
-
69
- def make_api_request(endpoint: str, method: str = "GET", data: Dict = None, token: str = None) -> Dict:
70
- """Make request to FastAPI backend"""
71
- url = f"http://127.0.0.1:{server_port}{endpoint}"
72
- headers = {}
73
-
74
- if token:
75
- headers["Authorization"] = f"Bearer {token}"
76
 
77
- if method == "POST" and data:
78
- headers["Content-Type"] = "application/json"
79
-
80
- try:
81
- if method == "GET":
82
- response = requests.get(url, headers=headers, timeout=10)
83
- elif method == "POST":
84
- response = requests.post(url, json=data, headers=headers, timeout=10)
85
- else:
86
- return {"error": f"Unsupported method: {method}"}
87
 
88
- if response.status_code == 200:
89
- return response.json()
90
- else:
91
- return {"error": f"HTTP {response.status_code}: {response.text}"}
92
-
93
- except requests.exceptions.RequestException as e:
94
- return {"error": f"Request failed: {str(e)}"}
95
-
96
- # Authentication state
97
- auth_state = {"token": None, "user": None}
98
-
99
- def login_user(username: str, password: str) -> tuple:
100
- """Login user and return status"""
101
- if not username or not password:
102
- return False, "نام کاربری و رمز عبور الزامی است", "", ""
103
-
104
- data = {"username": username, "password": password}
105
- result = make_api_request("/api/auth/login", "POST", data)
106
-
107
- if "error" in result:
108
- return False, f"خطا در ورود: {result['error']}", "", ""
109
-
110
- if "access_token" in result:
111
- auth_state["token"] = result["access_token"]
112
 
113
- # Get user info
114
- user_info = make_api_request("/api/auth/me", "GET", token=auth_state["token"])
115
- if "error" not in user_info:
116
- auth_state["user"] = user_info
117
- return True, f"خوش آمدید {user_info.get('username', 'کاربر')}!", "", ""
118
-
119
- return False, "ورود ناموفق", "", ""
120
-
121
- def register_user(username: str, email: str, password: str) -> tuple:
122
- """Register new user"""
123
- if not all([username, email, password]):
124
- return False, "تمام فیلدها الزامی است", "", "", ""
125
-
126
- data = {
127
- "username": username,
128
- "email": email,
129
- "password": password,
130
- "role": "user"
131
- }
132
-
133
- result = make_api_request("/api/auth/register", "POST", data)
134
-
135
- if "error" in result:
136
- return False, f"خطا در ثبت نام: {result['error']}", "", "", ""
137
-
138
- return True, "ثبت نام موفقیت آمیز بود. اکنون می‌توانید وارد شوید.", "", "", ""
139
-
140
- def logout_user():
141
- """Logout current user"""
142
- if auth_state["token"]:
143
- make_api_request("/api/auth/logout", "POST", token=auth_state["token"])
144
-
145
- auth_state["token"] = None
146
- auth_state["user"] = None
147
- return False, "خروج موفقیت آمیز", "", ""
148
-
149
- def get_server_status():
150
- """Get server status"""
151
- try:
152
- response = make_api_request("/health")
153
- if "error" not in response:
154
- return f" Server Status: {response.get('status', 'Unknown')}"
155
- else:
156
- return f"❌ Server Error: {response['error']}"
157
- except:
158
- return "❌ Server not responding"
159
-
160
- def process_document(file, document_type: str = "قرارداد"):
161
- """Process uploaded document"""
162
- if not file:
163
- return "لطفاً فایلی را انتخاب کنید"
164
-
165
- if not auth_state["token"]:
166
- return "لطفاً ابتدا وارد شوید"
167
 
168
- # This would integrate with your document processing API
169
- return f"فایل '{file.name}' از نوع '{document_type}' در حال پردازش است..."
170
-
171
- # Start FastAPI server in background
172
- def start_background_server():
173
- """Start FastAPI server in background thread"""
174
- server_thread = threading.Thread(target=start_fastapi_server, daemon=True)
175
- server_thread.start()
 
 
 
 
 
 
 
 
176
 
177
- # Wait for server to be ready
178
- if wait_for_server():
179
- print("🎉 System ready!")
180
- else:
181
- print("⚠️ System may not be fully functional")
182
-
183
- # Start the background server
184
- start_background_server()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # Create Gradio interface
187
- with gr.Blocks(
188
- title="Legal Dashboard - داشبورد حقوقی",
189
- theme=gr.themes.Soft(),
190
- css="""
191
- .container { max-width: 1200px; margin: auto; }
192
- .login-box { background: #f8f9fa; padding: 20px; border-radius: 10px; }
193
- .status-box { background: #e7f3ff; padding: 10px; border-radius: 5px; margin: 10px 0; }
194
- """,
195
- rtl=True
196
- ) as app:
197
 
198
- gr.Markdown("""
199
- # 📊 داشبورد حقوقی
200
- ### سیستم مدیریت و تحلیل اسناد حقوقی
201
 
202
- این سیستم امکان آپلود، تحلیل و مدیریت اسناد حقوقی را فراهم می‌کند.
203
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
- # Authentication section
206
- with gr.Tab("🔐 احراز هویت"):
207
- with gr.Row():
208
- with gr.Column():
209
- gr.Markdown("### ورود به سیستم")
210
- login_username = gr.Textbox(label="نام کاربری", placeholder="admin")
211
- login_password = gr.Textbox(label="رمز عبور", type="password", placeholder="admin123")
212
- login_btn = gr.Button("ورود", variant="primary")
213
- login_status = gr.Textbox(label="وضعیت", interactive=False)
214
-
215
- with gr.Column():
216
- gr.Markdown("### ثبت نام")
217
- reg_username = gr.Textbox(label="نام کاربری")
218
- reg_email = gr.Textbox(label="ایمیل")
219
- reg_password = gr.Textbox(label="رمز عبور", type="password")
220
- register_btn = gr.Button("ثبت نام", variant="secondary")
221
- reg_status = gr.Textbox(label="وضعیت", interactive=False)
222
 
223
- with gr.Row():
224
- logout_btn = gr.Button("خروج", variant="stop")
225
- server_status = gr.Textbox(label="وضعیت سرور", value=get_server_status, every=30)
226
-
227
- # Document processing section
228
- with gr.Tab("📄 پردازش اسناد"):
229
- gr.Markdown("### آپلود و تحلیل اسناد")
230
 
231
- with gr.Row():
232
- with gr.Column():
233
- file_input = gr.File(
234
- label="انتخاب فایل",
235
- file_types=[".pdf", ".docx", ".doc", ".txt"],
236
- type="filepath"
237
- )
238
- doc_type = gr.Dropdown(
239
- label="نوع سند",
240
- choices=["قرارداد", "دادخواست", "رأی دادگاه", "سند اداری", "سایر"],
241
- value="قرارداد"
242
- )
243
- process_btn = gr.Button("پردازش سند", variant="primary")
244
 
245
- with gr.Column():
246
- process_result = gr.Textbox(
247
- label="نتیجه پردازش",
248
- lines=10,
249
- interactive=False
250
- )
251
-
252
- # System information
253
- with gr.Tab("ℹ️ اطلاعات سیستم"):
254
- gr.Markdown("""
255
- ### راهنمای استفاده
256
-
257
- **احراز هویت:**
258
- - کاربر پیش‌فرض: `admin` / `admin123`
259
- - برای دسترسی کامل ابتدا وارد شوید
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
- **پردازش اسناد:**
262
- - فرمت‌های پشتیبانی شده: PDF, DOCX, DOC, TXT
263
- - حداکثر حجم فایل: 50MB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
- **ویژگی‌ها:**
266
- - تحلیل متن با هوش مصنوعی
267
- - استخراج اطلاعات کلیدی
268
- - تشخیص نوع سند
269
- - آرشیو و مدیریت اسناد
270
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- api_status = gr.JSON(
273
- label="وضعیت API",
274
- value=lambda: make_api_request("/health")
275
- )
276
-
277
- # Event handlers
278
- login_btn.click(
279
- fn=login_user,
280
- inputs=[login_username, login_password],
281
- outputs=[gr.State(), login_status, login_username, login_password]
282
- )
283
-
284
- register_btn.click(
285
- fn=register_user,
286
- inputs=[reg_username, reg_email, reg_password],
287
- outputs=[gr.State(), reg_status, reg_username, reg_email, reg_password]
288
- )
289
-
290
- logout_btn.click(
291
- fn=logout_user,
292
- outputs=[gr.State(), login_status, login_username, login_password]
293
- )
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
- process_btn.click(
296
- fn=process_document,
297
- inputs=[file_input, doc_type],
298
- outputs=[process_result]
299
- )
300
 
301
- # Launch configuration for Hugging Face Spaces
302
- if __name__ == "__main__":
303
- # Check if running in HF Spaces
304
- if os.getenv("SPACE_ID"):
305
- print("🤗 Running in Hugging Face Spaces")
306
- app.launch(
 
 
 
 
 
 
307
  server_name="0.0.0.0",
308
  server_port=7860,
309
  share=False,
310
  show_error=True,
311
- debug=False
 
312
  )
313
- else:
314
- print("🖥️ Running locally")
315
- app.launch(
316
- server_name="127.0.0.1",
317
- server_port=7860,
318
- share=True,
319
- show_error=True,
320
- debug=True
321
- )
 
 
 
 
 
 
 
 
1
  import os
2
+ import tempfile
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional, Tuple
 
 
 
6
 
7
+ try:
8
+ import gradio as gr
9
+ GRADIO_AVAILABLE = True
10
+ except ImportError:
11
+ GRADIO_AVAILABLE = False
12
+ logging.warning("Gradio not available")
13
 
14
+ # Import our services
15
+ try:
16
+ from app.services.ocr_service import ocr_service
17
+ from app.services.database_service import DatabaseService
18
+ OCR_AVAILABLE = True
19
+ except ImportError:
20
+ OCR_AVAILABLE = False
21
+ logging.warning("OCR service not available")
22
 
23
+ logger = logging.getLogger(__name__)
 
 
24
 
25
+ class LegalDashboardGradio:
26
+ """
27
+ Gradio interface for Legal Dashboard
28
+ """
29
 
30
+ def __init__(self):
31
+ self.ocr_service = ocr_service if OCR_AVAILABLE else None
32
+ self.db_service = None
33
 
34
+ # Initialize database if available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  try:
36
+ self.db_service = DatabaseService()
37
+ except Exception as e:
38
+ logger.warning(f"Database service not available: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ async def process_document(self, file) -> Tuple[str, str, str]:
41
+ """
42
+ Process uploaded document and extract text
43
+ """
44
+ if not file:
45
+ return "❌ No file uploaded", "", ""
 
 
 
 
46
 
47
+ if not self.ocr_service:
48
+ return "❌ OCR service not available", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ try:
51
+ # Get file path
52
+ file_path = file.name
53
+ file_extension = Path(file_path).suffix.lower()
54
+
55
+ # Process based on file type
56
+ if file_extension == '.pdf':
57
+ result = await self.ocr_service.extract_text_from_pdf(file_path)
58
+ elif file_extension in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
59
+ result = await self.ocr_service.extract_text_from_image(file_path)
60
+ else:
61
+ return f" Unsupported file type: {file_extension}", "", ""
62
+
63
+ if result["success"]:
64
+ # Process text with NLP if available
65
+ processed = await self.ocr_service.process_text(result["text"])
66
+
67
+ # Create status message
68
+ status = f"✅ Successfully processed using {result['method']}"
69
+
70
+ # Create metadata info
71
+ metadata = f"""
72
+ **Processing Details:**
73
+ - Method: {result['method']}
74
+ - Character Count: {len(result['text'])}
75
+ - Pages: {len(result.get('pages', []))}
76
+ """
77
+
78
+ if processed.get('entities'):
79
+ entities_info = "\n**Named Entities Found:**\n"
80
+ for ent in processed['entities'][:10]: # Show first 10 entities
81
+ entities_info += f"- {ent['text']} ({ent['label']})\n"
82
+ metadata += entities_info
83
+
84
+ return status, result["text"], metadata
85
+ else:
86
+ error_msg = result.get("metadata", {}).get("error", "Unknown error")
87
+ return f"❌ Processing failed: {error_msg}", "", ""
88
+
89
+ except Exception as e:
90
+ logger.error(f"Document processing error: {e}")
91
+ return f" Error: {str(e)}", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ def search_documents(self, query: str) -> str:
94
+ """
95
+ Search in processed documents
96
+ """
97
+ if not query.strip():
98
+ return "Please enter a search query"
99
+
100
+ if not self.db_service:
101
+ return "Database service not available"
102
+
103
+ try:
104
+ # This would search in the database
105
+ # For now, return a placeholder
106
+ return f"Search results for '{query}' would appear here.\n\nDatabase integration coming soon..."
107
+ except Exception as e:
108
+ return f"Search error: {str(e)}"
109
 
110
+ def get_system_status(self) -> str:
111
+ """
112
+ Get system status information
113
+ """
114
+ try:
115
+ status = []
116
+
117
+ # OCR Service Status
118
+ if self.ocr_service:
119
+ ocr_status = self.ocr_service.get_service_status()
120
+ status.append("🔍 **OCR Service:**")
121
+ status.append(f" - Status: {'✅ Ready' if ocr_status['fallback_ready'] else '❌ Not Ready'}")
122
+ status.append(f" - Transformers: {'✅ Available' if ocr_status['transformers_ready'] else '❌ Not Available'}")
123
+ status.append(f" - spaCy: {'✅ Available' if ocr_status['spacy_ready'] else '❌ Not Available'}")
124
+ status.append(f" - Models: {', '.join(ocr_status['models_loaded']) if ocr_status['models_loaded'] else 'None'}")
125
+ else:
126
+ status.append("🔍 **OCR Service:** ❌ Not Available")
127
+
128
+ # Database Service Status
129
+ if self.db_service:
130
+ status.append("\n💾 **Database Service:** ✅ Available")
131
+ else:
132
+ status.append("\n💾 **Database Service:** ❌ Not Available")
133
+
134
+ # System Info
135
+ status.append(f"\n🖥️ **System Info:**")
136
+ status.append(f" - Python: Available")
137
+ status.append(f" - Gradio: {'✅ Available' if GRADIO_AVAILABLE else '❌ Not Available'}")
138
+
139
+ return "\n".join(status)
140
+
141
+ except Exception as e:
142
+ return f"Error getting system status: {str(e)}"
143
 
144
+ def create_gradio_interface():
145
+ """
146
+ Create and return the Gradio interface
147
+ """
148
+ if not GRADIO_AVAILABLE:
149
+ return None
 
 
 
 
 
150
 
151
+ dashboard = LegalDashboardGradio()
 
 
152
 
153
+ # Custom CSS
154
+ css = """
155
+ .gradio-container {
156
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
157
+ }
158
+ .main-header {
159
+ text-align: center;
160
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
161
+ color: white;
162
+ padding: 20px;
163
+ border-radius: 10px;
164
+ margin-bottom: 20px;
165
+ }
166
+ .status-box {
167
+ background: #f8f9fa;
168
+ border-left: 4px solid #28a745;
169
+ padding: 15px;
170
+ border-radius: 5px;
171
+ }
172
+ """
173
 
174
+ with gr.Blocks(css=css, title="Legal Dashboard", theme=gr.themes.Soft()) as iface:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ # Header
177
+ gr.HTML("""
178
+ <div class="main-header">
179
+ <h1>🏛️ Legal Dashboard</h1>
180
+ <p>Advanced Legal Document Management System with AI-Powered OCR</p>
181
+ </div>
182
+ """)
183
 
184
+ with gr.Tab("📄 Document Processing"):
185
+ gr.Markdown("## Upload and Process Documents")
186
+ gr.Markdown("Upload PDF files or images to extract text using advanced OCR technology.")
 
 
 
 
 
 
 
 
 
 
187
 
188
+ with gr.Row():
189
+ with gr.Column(scale=1):
190
+ file_input = gr.File(
191
+ label="Upload Document",
192
+ file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".tiff"],
193
+ type="file"
194
+ )
195
+ process_btn = gr.Button("🔍 Process Document", variant="primary", size="lg")
196
+
197
+ with gr.Column(scale=2):
198
+ status_output = gr.Textbox(
199
+ label="Processing Status",
200
+ placeholder="Upload a document and click 'Process Document' to begin...",
201
+ interactive=False
202
+ )
203
+
204
+ with gr.Row():
205
+ with gr.Column():
206
+ extracted_text = gr.Textbox(
207
+ label="Extracted Text",
208
+ placeholder="Processed text will appear here...",
209
+ lines=15,
210
+ max_lines=30,
211
+ interactive=False
212
+ )
213
+
214
+ with gr.Column():
215
+ metadata_output = gr.Textbox(
216
+ label="Processing Details",
217
+ placeholder="Processing metadata and analysis will appear here...",
218
+ lines=15,
219
+ max_lines=30,
220
+ interactive=False
221
+ )
222
+
223
+ # Connect the processing function
224
+ process_btn.click(
225
+ fn=dashboard.process_document,
226
+ inputs=[file_input],
227
+ outputs=[status_output, extracted_text, metadata_output]
228
+ )
229
 
230
+ with gr.Tab("🔍 Search Documents"):
231
+ gr.Markdown("## Search Processed Documents")
232
+ gr.Markdown("Search through previously processed documents using keywords and phrases.")
233
+
234
+ with gr.Row():
235
+ search_input = gr.Textbox(
236
+ label="Search Query",
237
+ placeholder="Enter keywords to search...",
238
+ scale=3
239
+ )
240
+ search_btn = gr.Button("🔍 Search", variant="primary", scale=1)
241
+
242
+ search_results = gr.Textbox(
243
+ label="Search Results",
244
+ placeholder="Search results will appear here...",
245
+ lines=10,
246
+ interactive=False
247
+ )
248
+
249
+ # Connect search function
250
+ search_btn.click(
251
+ fn=dashboard.search_documents,
252
+ inputs=[search_input],
253
+ outputs=[search_results]
254
+ )
255
 
256
+ with gr.Tab("⚙️ System Status"):
257
+ gr.Markdown("## System Status and Information")
258
+
259
+ status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
260
+ system_status = gr.Textbox(
261
+ label="System Status",
262
+ placeholder="Click 'Refresh Status' to check system health...",
263
+ lines=15,
264
+ interactive=False
265
+ )
266
+
267
+ # Connect status function
268
+ status_btn.click(
269
+ fn=dashboard.get_system_status,
270
+ outputs=[system_status]
271
+ )
272
+
273
+ # Auto-load status on interface start
274
+ iface.load(
275
+ fn=dashboard.get_system_status,
276
+ outputs=[system_status]
277
+ )
278
 
279
+ with gr.Tab("📚 About"):
280
+ gr.Markdown("""
281
+ ## Legal Dashboard
282
+
283
+ **Advanced Legal Document Management System**
284
+
285
+ ### Features:
286
+ - 📄 **PDF Processing**: Extract text from PDF documents with high accuracy
287
+ - 🖼️ **Image OCR**: Process scanned documents and images
288
+ - 🧠 **AI-Powered**: Uses advanced transformer models for text recognition
289
+ - 🔍 **Smart Search**: Intelligent search capabilities across documents
290
+ - 📊 **Analytics**: Document analysis and metadata extraction
291
+ - 🔒 **Secure**: Privacy-focused document processing
292
+
293
+ ### Supported Formats:
294
+ - **Documents**: PDF
295
+ - **Images**: JPG, JPEG, PNG, BMP, TIFF
296
+
297
+ ### Technology Stack:
298
+ - **OCR**: PyMuPDF, OpenCV, Transformers (TrOCR)
299
+ - **NLP**: spaCy for named entity recognition
300
+ - **ML**: PyTorch, Hugging Face Transformers
301
+ - **Interface**: Gradio for web interface
302
+
303
+ ### Usage:
304
+ 1. Upload your document using the **Document Processing** tab
305
+ 2. Click **Process Document** to extract text
306
+ 3. Use the **Search** tab to find specific content
307
+ 4. Check **System Status** for service health
308
+
309
+ ---
310
+
311
+ *This system is designed for legal professionals to efficiently process and manage legal documents with the power of AI.*
312
+ """)
313
 
314
+ return iface
 
 
 
 
315
 
316
+ def launch_gradio_app():
317
+ """
318
+ Launch the Gradio application
319
+ """
320
+ if not GRADIO_AVAILABLE:
321
+ print("❌ Gradio not available. Please install gradio: pip install gradio")
322
+ return None
323
+
324
+ iface = create_gradio_interface()
325
+ if iface:
326
+ print("🚀 Starting Legal Dashboard Gradio Interface...")
327
+ iface.launch(
328
  server_name="0.0.0.0",
329
  server_port=7860,
330
  share=False,
331
  show_error=True,
332
+ show_tips=True,
333
+ enable_queue=True
334
  )
335
+ return iface
336
+
337
+ if __name__ == "__main__":
338
+ launch_gradio_app()