add app.py / main file
Browse files
app.py
CHANGED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py - For Hugging Face Spaces (without Modal)
|
2 |
+
import gradio as gr
|
3 |
+
from transformers import pipeline
|
4 |
+
import torch
|
5 |
+
from functools import lru_cache
|
6 |
+
import logging
|
7 |
+
|
8 |
+
# Setup logging
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
class TextAnalyzer:
|
13 |
+
def __init__(self):
|
14 |
+
"""Initialize models"""
|
15 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
16 |
+
logger.info(f"Using device: {'GPU' if self.device == 0 else 'CPU'}")
|
17 |
+
|
18 |
+
# Load models
|
19 |
+
logger.info("Loading models...")
|
20 |
+
self.load_models()
|
21 |
+
logger.info("β
All models loaded successfully!")
|
22 |
+
|
23 |
+
def load_models(self):
|
24 |
+
"""Load all required models"""
|
25 |
+
try:
|
26 |
+
# Use smaller, faster models for Hugging Face Spaces
|
27 |
+
self.sentiment_analyzer = pipeline(
|
28 |
+
"sentiment-analysis",
|
29 |
+
model="distilbert-base-uncased-finetuned-sst-2-english",
|
30 |
+
device=self.device
|
31 |
+
)
|
32 |
+
|
33 |
+
# Use a smaller summarization model
|
34 |
+
self.summarizer = pipeline(
|
35 |
+
"summarization",
|
36 |
+
model="sshleifer/distilbart-cnn-12-6",
|
37 |
+
device=self.device
|
38 |
+
)
|
39 |
+
|
40 |
+
# Simple language detection (or skip if too slow)
|
41 |
+
try:
|
42 |
+
self.language_detector = pipeline(
|
43 |
+
"text-classification",
|
44 |
+
model="papluca/xlm-roberta-base-language-detection",
|
45 |
+
device=self.device
|
46 |
+
)
|
47 |
+
self.has_language_detection = True
|
48 |
+
except:
|
49 |
+
self.has_language_detection = False
|
50 |
+
logger.warning("Language detection model not loaded")
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"Error loading models: {e}")
|
54 |
+
raise
|
55 |
+
|
56 |
+
@lru_cache(maxsize=64)
|
57 |
+
def cached_analyze(self, text_hash: str, text: str):
|
58 |
+
"""Cache results for identical inputs"""
|
59 |
+
return self._analyze_text(text)
|
60 |
+
|
61 |
+
def _analyze_text(self, text: str):
|
62 |
+
"""Core analysis logic"""
|
63 |
+
# Basic statistics
|
64 |
+
words = text.split()
|
65 |
+
word_count = len(words)
|
66 |
+
char_count = len(text)
|
67 |
+
|
68 |
+
# Limit text length for models
|
69 |
+
text_limited = text[:512]
|
70 |
+
|
71 |
+
try:
|
72 |
+
# Sentiment analysis
|
73 |
+
sentiment_result = self.sentiment_analyzer(text_limited)[0]
|
74 |
+
|
75 |
+
# Language detection (if available)
|
76 |
+
language_result = None
|
77 |
+
if self.has_language_detection:
|
78 |
+
try:
|
79 |
+
language_result = self.language_detector(text_limited)[0]
|
80 |
+
except:
|
81 |
+
language_result = None
|
82 |
+
|
83 |
+
# Summarization (only for longer texts)
|
84 |
+
summary = ""
|
85 |
+
if word_count > 50:
|
86 |
+
try:
|
87 |
+
summary_result = self.summarizer(
|
88 |
+
text,
|
89 |
+
max_length=min(100, word_count // 3),
|
90 |
+
min_length=20,
|
91 |
+
do_sample=False
|
92 |
+
)
|
93 |
+
summary = summary_result[0]["summary_text"]
|
94 |
+
except Exception as e:
|
95 |
+
summary = f"Unable to generate summary: {str(e)}"
|
96 |
+
else:
|
97 |
+
summary = "Text too short for summarization (minimum 50 words)"
|
98 |
+
|
99 |
+
return {
|
100 |
+
"sentiment": {
|
101 |
+
"label": sentiment_result["label"],
|
102 |
+
"confidence": round(sentiment_result["score"], 3)
|
103 |
+
},
|
104 |
+
"language": {
|
105 |
+
"language": language_result["label"] if language_result else "Unknown",
|
106 |
+
"confidence": round(language_result["score"], 3) if language_result else 0
|
107 |
+
} if self.has_language_detection else {"language": "Detection disabled", "confidence": 0},
|
108 |
+
"summary": summary,
|
109 |
+
"stats": {
|
110 |
+
"word_count": word_count,
|
111 |
+
"char_count": char_count,
|
112 |
+
"sentence_count": len([s for s in text.split('.') if s.strip()])
|
113 |
+
}
|
114 |
+
}
|
115 |
+
|
116 |
+
except Exception as e:
|
117 |
+
logger.error(f"Analysis error: {e}")
|
118 |
+
return {
|
119 |
+
"error": f"Analysis failed: {str(e)}",
|
120 |
+
"stats": {"word_count": word_count, "char_count": char_count}
|
121 |
+
}
|
122 |
+
|
123 |
+
def analyze(self, text: str):
|
124 |
+
"""Public analyze method with caching"""
|
125 |
+
if not text or not text.strip():
|
126 |
+
return None
|
127 |
+
|
128 |
+
# Create hash for caching
|
129 |
+
text_hash = str(hash(text.strip()))
|
130 |
+
return self.cached_analyze(text_hash, text.strip())
|
131 |
+
|
132 |
+
# Initialize analyzer
|
133 |
+
logger.info("Initializing Text Analyzer...")
|
134 |
+
try:
|
135 |
+
analyzer = TextAnalyzer()
|
136 |
+
analyzer_loaded = True
|
137 |
+
except Exception as e:
|
138 |
+
logger.error(f"Failed to load analyzer: {e}")
|
139 |
+
analyzer_loaded = False
|
140 |
+
|
141 |
+
def gradio_interface(text):
|
142 |
+
"""Gradio interface function"""
|
143 |
+
if not analyzer_loaded:
|
144 |
+
return (
|
145 |
+
"β Models failed to load. Please try again later.",
|
146 |
+
"β Error",
|
147 |
+
"β Error",
|
148 |
+
"β Error",
|
149 |
+
"β Error"
|
150 |
+
)
|
151 |
+
|
152 |
+
if not text or not text.strip():
|
153 |
+
return (
|
154 |
+
"Please enter some text to analyze.",
|
155 |
+
"No text provided",
|
156 |
+
"No text provided",
|
157 |
+
"No text provided",
|
158 |
+
"No text provided"
|
159 |
+
)
|
160 |
+
|
161 |
+
# Analyze text
|
162 |
+
results = analyzer.analyze(text)
|
163 |
+
|
164 |
+
if not results or "error" in results:
|
165 |
+
error_msg = results.get("error", "Unknown error occurred") if results else "Analysis failed"
|
166 |
+
return error_msg, "Error", "Error", "Error", "Error"
|
167 |
+
|
168 |
+
# Format results
|
169 |
+
sentiment_text = f"**{results['sentiment']['label']}** (confidence: {results['sentiment']['confidence']})"
|
170 |
+
|
171 |
+
language_text = f"**{results['language']['language']}**"
|
172 |
+
if results['language']['confidence'] > 0:
|
173 |
+
language_text += f" (confidence: {results['language']['confidence']})"
|
174 |
+
|
175 |
+
summary_text = results['summary']
|
176 |
+
|
177 |
+
stats_text = f"Words: {results['stats']['word_count']} | Characters: {results['stats']['char_count']} | Sentences: {results['stats'].get('sentence_count', 'N/A')}"
|
178 |
+
|
179 |
+
return sentiment_text, language_text, summary_text, stats_text, "β
Analysis complete!"
|
180 |
+
|
181 |
+
# Create Gradio interface
|
182 |
+
def create_app():
|
183 |
+
"""Create the Gradio application"""
|
184 |
+
with gr.Blocks(
|
185 |
+
title="Smart Text Analyzer",
|
186 |
+
theme=gr.themes.Soft()
|
187 |
+
) as demo:
|
188 |
+
|
189 |
+
gr.Markdown("""
|
190 |
+
# π§ Smart Text Analyzer
|
191 |
+
**Analyze text for sentiment, language, and generate summaries**
|
192 |
+
|
193 |
+
*Powered by Hugging Face Transformers*
|
194 |
+
""")
|
195 |
+
|
196 |
+
with gr.Row():
|
197 |
+
with gr.Column():
|
198 |
+
text_input = gr.Textbox(
|
199 |
+
label="π Enter your text",
|
200 |
+
placeholder="Type or paste your text here for analysis...",
|
201 |
+
lines=6
|
202 |
+
)
|
203 |
+
analyze_btn = gr.Button("π Analyze Text", variant="primary")
|
204 |
+
|
205 |
+
with gr.Row():
|
206 |
+
with gr.Column():
|
207 |
+
sentiment_output = gr.Markdown(label="π Sentiment")
|
208 |
+
language_output = gr.Markdown(label="π Language")
|
209 |
+
with gr.Column():
|
210 |
+
stats_output = gr.Markdown(label="π Statistics")
|
211 |
+
status_output = gr.Textbox(label="Status", interactive=False)
|
212 |
+
|
213 |
+
summary_output = gr.Textbox(
|
214 |
+
label="π Summary",
|
215 |
+
lines=3,
|
216 |
+
interactive=False
|
217 |
+
)
|
218 |
+
|
219 |
+
# Examples
|
220 |
+
gr.Examples(
|
221 |
+
examples=[
|
222 |
+
["I absolutely love this new restaurant! The food was incredible and the service was outstanding."],
|
223 |
+
["Climate change represents one of the most significant challenges of our time. Rising global temperatures are causing widespread environmental disruption."],
|
224 |
+
["This movie was disappointing. The plot was confusing and the acting was poor."]
|
225 |
+
],
|
226 |
+
inputs=text_input
|
227 |
+
)
|
228 |
+
|
229 |
+
analyze_btn.click(
|
230 |
+
fn=gradio_interface,
|
231 |
+
inputs=text_input,
|
232 |
+
outputs=[sentiment_output, language_output, summary_output, stats_output, status_output]
|
233 |
+
)
|
234 |
+
|
235 |
+
return demo
|
236 |
+
|
237 |
+
if __name__ == "__main__":
|
238 |
+
# Create and launch the app
|
239 |
+
app = create_app()
|
240 |
+
app.launch()
|