Deploy T5 Email Summarizer demo with Gradio
Browse files- README.md +34 -4
- app.py +303 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,12 +1,42 @@
|
|
1 |
---
|
2 |
title: T5 Email Summarizer Demo
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: T5 Email Summarizer Demo
|
3 |
+
emoji: π§
|
4 |
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
models:
|
12 |
+
- wordcab/t5-small-email-summarizer
|
13 |
+
datasets:
|
14 |
+
- argilla/FinePersonas-Conversations-Email-Summaries
|
15 |
+
space_hardware: "t4-small"
|
16 |
---
|
17 |
|
18 |
+
# T5 Email Summarizer - Interactive Demo
|
19 |
+
|
20 |
+
This Space provides an interactive demo of the [wordcab/t5-small-email-summarizer](https://huggingface.co/wordcab/t5-small-email-summarizer) model.
|
21 |
+
|
22 |
+
## Features
|
23 |
+
|
24 |
+
- π― **Dual-mode summarization**: Brief (1-2 sentences) or Full (detailed)
|
25 |
+
- π **Fast inference**: Under 1 second on T4 GPU
|
26 |
+
- πͺ **Robust**: Handles typos, abbreviations, and informal language
|
27 |
+
- π **Auto-detect**: Automatically chooses brief or full based on email length
|
28 |
+
|
29 |
+
## Model Details
|
30 |
+
|
31 |
+
- **Architecture**: T5-small (60M parameters)
|
32 |
+
- **Training Data**: [argilla/FinePersonas-Conversations-Email-Summaries](https://huggingface.co/datasets/argilla/FinePersonas-Conversations-Email-Summaries) (364K examples)
|
33 |
+
- **Max Input**: 512 tokens (~2500 characters)
|
34 |
+
- **License**: Apache 2.0
|
35 |
+
|
36 |
+
## Try It Out
|
37 |
+
|
38 |
+
1. Paste any email (formal or informal)
|
39 |
+
2. Select summary type or use auto-detect
|
40 |
+
3. Click "Generate Summary"
|
41 |
+
|
42 |
+
The model will produce a concise, accurate summary tailored to your needs!
|
app.py
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Gradio demo for T5 Email Summarizer
|
4 |
+
Deployed on HuggingFace Spaces with T4 GPU
|
5 |
+
"""
|
6 |
+
import gradio as gr
|
7 |
+
import torch
|
8 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
9 |
+
import time
|
10 |
+
|
11 |
+
# Load model and tokenizer
|
12 |
+
print("Loading T5 Email Summarizer model...")
|
13 |
+
model_name = "wordcab/t5-small-email-summarizer"
|
14 |
+
|
15 |
+
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
16 |
+
model = T5ForConditionalGeneration.from_pretrained(
|
17 |
+
model_name,
|
18 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
19 |
+
)
|
20 |
+
|
21 |
+
# Move to GPU if available
|
22 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
23 |
+
model = model.to(device)
|
24 |
+
model.eval()
|
25 |
+
|
26 |
+
print(f"Model loaded successfully on {device}!")
|
27 |
+
|
28 |
+
def summarize_email(email_text, summary_type, temperature=0.7, max_length=150):
|
29 |
+
"""
|
30 |
+
Generate email summary based on selected type
|
31 |
+
"""
|
32 |
+
if not email_text.strip():
|
33 |
+
return "Please enter an email to summarize.", 0
|
34 |
+
|
35 |
+
start_time = time.time()
|
36 |
+
|
37 |
+
# Prepare input based on summary type
|
38 |
+
if summary_type == "Brief (1-2 sentences)":
|
39 |
+
prefix = "summarize_brief:"
|
40 |
+
max_gen_length = 50
|
41 |
+
elif summary_type == "Full (detailed)":
|
42 |
+
prefix = "summarize_full:"
|
43 |
+
max_gen_length = max_length
|
44 |
+
else: # Auto
|
45 |
+
# Use brief for short emails, full for longer ones
|
46 |
+
if len(email_text.split()) < 100:
|
47 |
+
prefix = "summarize_brief:"
|
48 |
+
max_gen_length = 50
|
49 |
+
else:
|
50 |
+
prefix = "summarize_full:"
|
51 |
+
max_gen_length = max_length
|
52 |
+
|
53 |
+
# Format input
|
54 |
+
if "Subject:" in email_text and "Body:" in email_text:
|
55 |
+
# Already formatted
|
56 |
+
input_text = f"{prefix} {email_text}"
|
57 |
+
else:
|
58 |
+
# Auto-format
|
59 |
+
lines = email_text.strip().split('\n')
|
60 |
+
if len(lines) > 1 and len(lines[0]) < 100:
|
61 |
+
# First line might be subject
|
62 |
+
input_text = f"{prefix} Subject: {lines[0]}. Body: {' '.join(lines[1:])}"
|
63 |
+
else:
|
64 |
+
input_text = f"{prefix} Subject: Email. Body: {email_text}"
|
65 |
+
|
66 |
+
# Tokenize
|
67 |
+
inputs = tokenizer(
|
68 |
+
input_text,
|
69 |
+
max_length=512,
|
70 |
+
truncation=True,
|
71 |
+
return_tensors="pt"
|
72 |
+
).to(device)
|
73 |
+
|
74 |
+
# Generate summary
|
75 |
+
with torch.no_grad():
|
76 |
+
outputs = model.generate(
|
77 |
+
**inputs,
|
78 |
+
max_length=max_gen_length,
|
79 |
+
min_length=10,
|
80 |
+
temperature=temperature,
|
81 |
+
do_sample=temperature > 0,
|
82 |
+
top_p=0.9,
|
83 |
+
num_beams=2 if temperature == 0 else 1,
|
84 |
+
early_stopping=True,
|
85 |
+
no_repeat_ngram_size=3
|
86 |
+
)
|
87 |
+
|
88 |
+
# Decode
|
89 |
+
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
90 |
+
|
91 |
+
# Calculate metrics
|
92 |
+
processing_time = time.time() - start_time
|
93 |
+
input_tokens = len(inputs['input_ids'][0])
|
94 |
+
output_tokens = len(outputs[0])
|
95 |
+
|
96 |
+
# Add metadata
|
97 |
+
metadata = f"\n\n---\nπ **Metrics:**\n"
|
98 |
+
metadata += f"- Processing time: {processing_time:.2f}s\n"
|
99 |
+
metadata += f"- Input tokens: {input_tokens}/512\n"
|
100 |
+
metadata += f"- Output tokens: {output_tokens}\n"
|
101 |
+
metadata += f"- Summary type: {prefix.replace(':', '').replace('_', ' ').title()}\n"
|
102 |
+
|
103 |
+
return summary + metadata, processing_time
|
104 |
+
|
105 |
+
# Example emails
|
106 |
+
examples = [
|
107 |
+
[
|
108 |
+
"""Subject: Quarterly Budget Review Meeting
|
109 |
+
|
110 |
+
Dear Team,
|
111 |
+
|
112 |
+
I hope this email finds you well. I wanted to remind everyone about our quarterly budget review meeting scheduled for next Tuesday, March 15th at 2:00 PM EST in Conference Room A.
|
113 |
+
|
114 |
+
Please come prepared with:
|
115 |
+
- Q1 expense reports
|
116 |
+
- Updated project timelines
|
117 |
+
- Resource allocation requests for Q2
|
118 |
+
|
119 |
+
We'll be discussing the 15% budget increase for digital marketing initiatives and the proposed headcount expansion for the engineering team.
|
120 |
+
|
121 |
+
If you cannot attend in person, please join via Zoom using the link in the calendar invite.
|
122 |
+
|
123 |
+
Best regards,
|
124 |
+
Sarah Johnson
|
125 |
+
Finance Director""",
|
126 |
+
"Auto-detect",
|
127 |
+
0.7,
|
128 |
+
150
|
129 |
+
],
|
130 |
+
[
|
131 |
+
"""hey team,
|
132 |
+
|
133 |
+
quick update - cant make the meeting tmrw bc im stuck at the airport (flight delayed AGAIN ugh).
|
134 |
+
|
135 |
+
jim said we need to finalize teh proposal by friday or we'll miss the deadline... can someone take over? also dont forget to include the budget numbers from last months report.
|
136 |
+
|
137 |
+
btw has anyone seen my laptop charger? left it somewhere in the office yesterday lol
|
138 |
+
|
139 |
+
thx
|
140 |
+
mike""",
|
141 |
+
"Brief (1-2 sentences)",
|
142 |
+
0.7,
|
143 |
+
150
|
144 |
+
],
|
145 |
+
[
|
146 |
+
"""Subject: Production Deployment Issue - URGENT
|
147 |
+
|
148 |
+
Team,
|
149 |
+
|
150 |
+
We've identified a critical bug in the v2.3.1 release that's causing intermittent 500 errors on the checkout page.
|
151 |
+
|
152 |
+
Error rate: ~12% of transactions
|
153 |
+
Affected users: ~3,000 since deployment at 11 AM
|
154 |
+
Root cause: Memory leak in payment processing module
|
155 |
+
|
156 |
+
Immediate actions taken:
|
157 |
+
1. Rolled back to v2.3.0 on servers 1-4
|
158 |
+
2. Keeping servers 5-6 on v2.3.1 for debugging
|
159 |
+
3. Hotfix in development (ETA: 2 hours)
|
160 |
+
|
161 |
+
Please hold off on any non-critical deployments until we resolve this.
|
162 |
+
|
163 |
+
- DevOps Team""",
|
164 |
+
"Full (detailed)",
|
165 |
+
0.7,
|
166 |
+
150
|
167 |
+
],
|
168 |
+
[
|
169 |
+
"""Subject: Welcome to the Team!
|
170 |
+
|
171 |
+
Hi Jessica,
|
172 |
+
|
173 |
+
Welcome to Wordcab! We're excited to have you join our engineering team as a Senior Software Engineer.
|
174 |
+
|
175 |
+
Your first day is Monday, January 15th. Please arrive at the office by 9:00 AM. You'll meet with HR first to complete paperwork, then I'll give you a tour of the office and introduce you to the team.
|
176 |
+
|
177 |
+
Your laptop and equipment will be ready at your desk. We've scheduled orientation sessions throughout your first week to help you get up to speed with our codebase and development processes.
|
178 |
+
|
179 |
+
Looking forward to working with you!
|
180 |
+
|
181 |
+
Best,
|
182 |
+
Tom""",
|
183 |
+
"Brief (1-2 sentences)",
|
184 |
+
0.7,
|
185 |
+
150
|
186 |
+
]
|
187 |
+
]
|
188 |
+
|
189 |
+
# Create Gradio interface
|
190 |
+
with gr.Blocks(title="T5 Email Summarizer", theme=gr.themes.Soft()) as demo:
|
191 |
+
gr.Markdown("""
|
192 |
+
# π§ T5 Email Summarizer - Brief & Full
|
193 |
+
|
194 |
+
This model can generate both **brief** (1-2 sentences) and **full** (detailed) summaries of emails.
|
195 |
+
It's robust to messy, informal text with typos and abbreviations.
|
196 |
+
|
197 |
+
π€ **Model:** [wordcab/t5-small-email-summarizer](https://huggingface.co/wordcab/t5-small-email-summarizer)
|
198 |
+
| π **Dataset:** [argilla/FinePersonas-Conversations-Email-Summaries](https://huggingface.co/datasets/argilla/FinePersonas-Conversations-Email-Summaries)
|
199 |
+
| π **Powered by:** T4 GPU
|
200 |
+
""")
|
201 |
+
|
202 |
+
with gr.Row():
|
203 |
+
with gr.Column(scale=1):
|
204 |
+
email_input = gr.Textbox(
|
205 |
+
label="π Email Content",
|
206 |
+
placeholder="Paste your email here...\n\nYou can include Subject: and Body: or just paste the content directly.",
|
207 |
+
lines=12
|
208 |
+
)
|
209 |
+
|
210 |
+
with gr.Row():
|
211 |
+
summary_type = gr.Radio(
|
212 |
+
choices=["Auto-detect", "Brief (1-2 sentences)", "Full (detailed)"],
|
213 |
+
value="Auto-detect",
|
214 |
+
label="π Summary Type"
|
215 |
+
)
|
216 |
+
|
217 |
+
with gr.Accordion("βοΈ Advanced Settings", open=False):
|
218 |
+
temperature = gr.Slider(
|
219 |
+
minimum=0,
|
220 |
+
maximum=1,
|
221 |
+
value=0.7,
|
222 |
+
step=0.1,
|
223 |
+
label="Temperature (0 = deterministic, 1 = creative)"
|
224 |
+
)
|
225 |
+
max_length = gr.Slider(
|
226 |
+
minimum=50,
|
227 |
+
maximum=200,
|
228 |
+
value=150,
|
229 |
+
step=10,
|
230 |
+
label="Max Length (for full summaries)"
|
231 |
+
)
|
232 |
+
|
233 |
+
summarize_btn = gr.Button("β¨ Generate Summary", variant="primary")
|
234 |
+
|
235 |
+
with gr.Column(scale=1):
|
236 |
+
output = gr.Textbox(
|
237 |
+
label="π Summary",
|
238 |
+
lines=12,
|
239 |
+
interactive=False
|
240 |
+
)
|
241 |
+
|
242 |
+
processing_time = gr.Number(
|
243 |
+
label="β±οΈ Processing Time (seconds)",
|
244 |
+
precision=2,
|
245 |
+
interactive=False
|
246 |
+
)
|
247 |
+
|
248 |
+
gr.Markdown("### π‘ Try these examples:")
|
249 |
+
|
250 |
+
gr.Examples(
|
251 |
+
examples=examples,
|
252 |
+
inputs=[email_input, summary_type, temperature, max_length],
|
253 |
+
outputs=[output, processing_time],
|
254 |
+
fn=summarize_email,
|
255 |
+
cache_examples=True
|
256 |
+
)
|
257 |
+
|
258 |
+
summarize_btn.click(
|
259 |
+
fn=summarize_email,
|
260 |
+
inputs=[email_input, summary_type, temperature, max_length],
|
261 |
+
outputs=[output, processing_time]
|
262 |
+
)
|
263 |
+
|
264 |
+
gr.Markdown("""
|
265 |
+
---
|
266 |
+
### π How to use:
|
267 |
+
1. **Paste your email** in the input box (formal or informal, with or without typos)
|
268 |
+
2. **Select summary type** (or use Auto-detect)
|
269 |
+
3. **Click Generate Summary** to get your summary
|
270 |
+
|
271 |
+
### π― Features:
|
272 |
+
- **Dual-mode**: Get brief or detailed summaries on demand
|
273 |
+
- **Robust**: Handles typos, abbreviations, and informal language
|
274 |
+
- **Fast**: Processes emails in under 1 second on T4 GPU
|
275 |
+
- **Smart**: Auto-detects when to use brief vs full summaries
|
276 |
+
|
277 |
+
### π§ API Usage:
|
278 |
+
```python
|
279 |
+
from transformers import pipeline
|
280 |
+
|
281 |
+
summarizer = pipeline("summarization", model="wordcab/t5-small-email-summarizer")
|
282 |
+
|
283 |
+
# Brief summary
|
284 |
+
result = summarizer("summarize_brief: Subject: Meeting. Body: Let's meet at 3pm tomorrow.")
|
285 |
+
|
286 |
+
# Full summary
|
287 |
+
result = summarizer("summarize_full: Subject: Project Update. Body: [your long email]")
|
288 |
+
```
|
289 |
+
|
290 |
+
### π Citation:
|
291 |
+
If you use this model, please cite:
|
292 |
+
```bibtex
|
293 |
+
@misc{wordcab2025t5email,
|
294 |
+
title={T5 Email Summarizer - Brief & Full},
|
295 |
+
author={Wordcab Team},
|
296 |
+
year={2025},
|
297 |
+
publisher={HuggingFace}
|
298 |
+
}
|
299 |
+
```
|
300 |
+
""")
|
301 |
+
|
302 |
+
if __name__ == "__main__":
|
303 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.44.1
|
2 |
+
transformers==4.36.0
|
3 |
+
torch==2.1.0
|
4 |
+
sentencepiece==0.1.99
|
5 |
+
protobuf==3.20.3
|