gourav246 commited on
Commit
f985823
Β·
verified Β·
1 Parent(s): 95745c4

add app.py / main file

Browse files
Files changed (1) hide show
  1. app.py +240 -0
app.py CHANGED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - For Hugging Face Spaces (without Modal)
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ import torch
5
+ from functools import lru_cache
6
+ import logging
7
+
8
+ # Setup logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class TextAnalyzer:
13
+ def __init__(self):
14
+ """Initialize models"""
15
+ self.device = 0 if torch.cuda.is_available() else -1
16
+ logger.info(f"Using device: {'GPU' if self.device == 0 else 'CPU'}")
17
+
18
+ # Load models
19
+ logger.info("Loading models...")
20
+ self.load_models()
21
+ logger.info("βœ… All models loaded successfully!")
22
+
23
+ def load_models(self):
24
+ """Load all required models"""
25
+ try:
26
+ # Use smaller, faster models for Hugging Face Spaces
27
+ self.sentiment_analyzer = pipeline(
28
+ "sentiment-analysis",
29
+ model="distilbert-base-uncased-finetuned-sst-2-english",
30
+ device=self.device
31
+ )
32
+
33
+ # Use a smaller summarization model
34
+ self.summarizer = pipeline(
35
+ "summarization",
36
+ model="sshleifer/distilbart-cnn-12-6",
37
+ device=self.device
38
+ )
39
+
40
+ # Simple language detection (or skip if too slow)
41
+ try:
42
+ self.language_detector = pipeline(
43
+ "text-classification",
44
+ model="papluca/xlm-roberta-base-language-detection",
45
+ device=self.device
46
+ )
47
+ self.has_language_detection = True
48
+ except:
49
+ self.has_language_detection = False
50
+ logger.warning("Language detection model not loaded")
51
+
52
+ except Exception as e:
53
+ logger.error(f"Error loading models: {e}")
54
+ raise
55
+
56
+ @lru_cache(maxsize=64)
57
+ def cached_analyze(self, text_hash: str, text: str):
58
+ """Cache results for identical inputs"""
59
+ return self._analyze_text(text)
60
+
61
+ def _analyze_text(self, text: str):
62
+ """Core analysis logic"""
63
+ # Basic statistics
64
+ words = text.split()
65
+ word_count = len(words)
66
+ char_count = len(text)
67
+
68
+ # Limit text length for models
69
+ text_limited = text[:512]
70
+
71
+ try:
72
+ # Sentiment analysis
73
+ sentiment_result = self.sentiment_analyzer(text_limited)[0]
74
+
75
+ # Language detection (if available)
76
+ language_result = None
77
+ if self.has_language_detection:
78
+ try:
79
+ language_result = self.language_detector(text_limited)[0]
80
+ except:
81
+ language_result = None
82
+
83
+ # Summarization (only for longer texts)
84
+ summary = ""
85
+ if word_count > 50:
86
+ try:
87
+ summary_result = self.summarizer(
88
+ text,
89
+ max_length=min(100, word_count // 3),
90
+ min_length=20,
91
+ do_sample=False
92
+ )
93
+ summary = summary_result[0]["summary_text"]
94
+ except Exception as e:
95
+ summary = f"Unable to generate summary: {str(e)}"
96
+ else:
97
+ summary = "Text too short for summarization (minimum 50 words)"
98
+
99
+ return {
100
+ "sentiment": {
101
+ "label": sentiment_result["label"],
102
+ "confidence": round(sentiment_result["score"], 3)
103
+ },
104
+ "language": {
105
+ "language": language_result["label"] if language_result else "Unknown",
106
+ "confidence": round(language_result["score"], 3) if language_result else 0
107
+ } if self.has_language_detection else {"language": "Detection disabled", "confidence": 0},
108
+ "summary": summary,
109
+ "stats": {
110
+ "word_count": word_count,
111
+ "char_count": char_count,
112
+ "sentence_count": len([s for s in text.split('.') if s.strip()])
113
+ }
114
+ }
115
+
116
+ except Exception as e:
117
+ logger.error(f"Analysis error: {e}")
118
+ return {
119
+ "error": f"Analysis failed: {str(e)}",
120
+ "stats": {"word_count": word_count, "char_count": char_count}
121
+ }
122
+
123
+ def analyze(self, text: str):
124
+ """Public analyze method with caching"""
125
+ if not text or not text.strip():
126
+ return None
127
+
128
+ # Create hash for caching
129
+ text_hash = str(hash(text.strip()))
130
+ return self.cached_analyze(text_hash, text.strip())
131
+
132
+ # Initialize analyzer
133
+ logger.info("Initializing Text Analyzer...")
134
+ try:
135
+ analyzer = TextAnalyzer()
136
+ analyzer_loaded = True
137
+ except Exception as e:
138
+ logger.error(f"Failed to load analyzer: {e}")
139
+ analyzer_loaded = False
140
+
141
+ def gradio_interface(text):
142
+ """Gradio interface function"""
143
+ if not analyzer_loaded:
144
+ return (
145
+ "❌ Models failed to load. Please try again later.",
146
+ "❌ Error",
147
+ "❌ Error",
148
+ "❌ Error",
149
+ "❌ Error"
150
+ )
151
+
152
+ if not text or not text.strip():
153
+ return (
154
+ "Please enter some text to analyze.",
155
+ "No text provided",
156
+ "No text provided",
157
+ "No text provided",
158
+ "No text provided"
159
+ )
160
+
161
+ # Analyze text
162
+ results = analyzer.analyze(text)
163
+
164
+ if not results or "error" in results:
165
+ error_msg = results.get("error", "Unknown error occurred") if results else "Analysis failed"
166
+ return error_msg, "Error", "Error", "Error", "Error"
167
+
168
+ # Format results
169
+ sentiment_text = f"**{results['sentiment']['label']}** (confidence: {results['sentiment']['confidence']})"
170
+
171
+ language_text = f"**{results['language']['language']}**"
172
+ if results['language']['confidence'] > 0:
173
+ language_text += f" (confidence: {results['language']['confidence']})"
174
+
175
+ summary_text = results['summary']
176
+
177
+ stats_text = f"Words: {results['stats']['word_count']} | Characters: {results['stats']['char_count']} | Sentences: {results['stats'].get('sentence_count', 'N/A')}"
178
+
179
+ return sentiment_text, language_text, summary_text, stats_text, "βœ… Analysis complete!"
180
+
181
+ # Create Gradio interface
182
+ def create_app():
183
+ """Create the Gradio application"""
184
+ with gr.Blocks(
185
+ title="Smart Text Analyzer",
186
+ theme=gr.themes.Soft()
187
+ ) as demo:
188
+
189
+ gr.Markdown("""
190
+ # 🧠 Smart Text Analyzer
191
+ **Analyze text for sentiment, language, and generate summaries**
192
+
193
+ *Powered by Hugging Face Transformers*
194
+ """)
195
+
196
+ with gr.Row():
197
+ with gr.Column():
198
+ text_input = gr.Textbox(
199
+ label="πŸ“ Enter your text",
200
+ placeholder="Type or paste your text here for analysis...",
201
+ lines=6
202
+ )
203
+ analyze_btn = gr.Button("πŸ” Analyze Text", variant="primary")
204
+
205
+ with gr.Row():
206
+ with gr.Column():
207
+ sentiment_output = gr.Markdown(label="😊 Sentiment")
208
+ language_output = gr.Markdown(label="🌍 Language")
209
+ with gr.Column():
210
+ stats_output = gr.Markdown(label="πŸ“Š Statistics")
211
+ status_output = gr.Textbox(label="Status", interactive=False)
212
+
213
+ summary_output = gr.Textbox(
214
+ label="πŸ“ Summary",
215
+ lines=3,
216
+ interactive=False
217
+ )
218
+
219
+ # Examples
220
+ gr.Examples(
221
+ examples=[
222
+ ["I absolutely love this new restaurant! The food was incredible and the service was outstanding."],
223
+ ["Climate change represents one of the most significant challenges of our time. Rising global temperatures are causing widespread environmental disruption."],
224
+ ["This movie was disappointing. The plot was confusing and the acting was poor."]
225
+ ],
226
+ inputs=text_input
227
+ )
228
+
229
+ analyze_btn.click(
230
+ fn=gradio_interface,
231
+ inputs=text_input,
232
+ outputs=[sentiment_output, language_output, summary_output, stats_output, status_output]
233
+ )
234
+
235
+ return demo
236
+
237
+ if __name__ == "__main__":
238
+ # Create and launch the app
239
+ app = create_app()
240
+ app.launch()