abhilash88 commited on
Commit
2cd4aa9
·
verified ·
1 Parent(s): 46621be

Upload alt_tag_generator.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. alt_tag_generator.py +300 -0
alt_tag_generator.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI-Powered Alt Tag Generator
3
+ Complete accessibility tool for generating automatic alt tags from images
4
+ """
5
+
6
+ import torch
7
+ import time
8
+ import warnings
9
+ from io import BytesIO
10
+ import requests
11
+ from PIL import Image
12
+ from transformers import BlipProcessor, BlipForConditionalGeneration
13
+ from config import CONFIG, SUPPORTED_LANGUAGES
14
+
15
+ warnings.filterwarnings("ignore")
16
+
17
+ class AltTagGenerator:
18
+ """AI-powered alt tag generator using BLIP model"""
19
+
20
+ def __init__(self, device='auto', verbose=True):
21
+ self.verbose = verbose
22
+ self.device = 'cuda' if torch.cuda.is_available() and device != 'cpu' else 'cpu'
23
+
24
+ if self.verbose:
25
+ print(f"Initializing Alt Tag Generator on {self.device}")
26
+
27
+ self.model_loaded = False
28
+ self._load_models()
29
+
30
+ def _load_models(self):
31
+ """Load BLIP model and processor"""
32
+ try:
33
+ if self.verbose:
34
+ print("📥 Loading BLIP model...")
35
+
36
+ self.processor = BlipProcessor.from_pretrained(
37
+ "Salesforce/blip-image-captioning-base",
38
+ cache_dir="./models"
39
+ )
40
+
41
+ self.model = BlipForConditionalGeneration.from_pretrained(
42
+ "Salesforce/blip-image-captioning-base",
43
+ torch_dtype=torch.float16 if self.device == 'cuda' else torch.float32,
44
+ low_cpu_mem_usage=True,
45
+ cache_dir="./models"
46
+ )
47
+
48
+ self.model = self.model.to(self.device)
49
+ self.model.eval()
50
+ self.model_loaded = True
51
+
52
+ if self.verbose:
53
+ print("✅ BLIP model loaded successfully")
54
+
55
+ except Exception as e:
56
+ print(f"❌ Error loading model: {e}")
57
+ self.model_loaded = False
58
+
59
+ def load_image(self, image_source):
60
+ """Load image from URL, file path, or PIL Image"""
61
+ try:
62
+ if isinstance(image_source, str):
63
+ if image_source.startswith(('http://', 'https://')):
64
+ response = requests.get(image_source, timeout=15)
65
+ response.raise_for_status()
66
+ image = Image.open(BytesIO(response.content))
67
+ else:
68
+ image = Image.open(image_source)
69
+ else:
70
+ image = image_source
71
+
72
+ image = image.convert('RGB')
73
+
74
+ # Resize if too large
75
+ max_size = 800
76
+ if max(image.size) > max_size:
77
+ ratio = max_size / max(image.size)
78
+ new_size = tuple(int(dim * ratio) for dim in image.size)
79
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
80
+
81
+ return image
82
+
83
+ except Exception as e:
84
+ raise Exception(f"Failed to load image: {e}")
85
+
86
+ def generate_caption(self, image):
87
+ """Generate caption using BLIP model"""
88
+ if not self.model_loaded:
89
+ return {
90
+ 'caption': 'Model not loaded',
91
+ 'processing_time': 0.0,
92
+ 'confidence': 0.0,
93
+ 'error': 'Model failed to load'
94
+ }
95
+
96
+ start_time = time.time()
97
+
98
+ try:
99
+ if self.device == 'cuda':
100
+ torch.cuda.empty_cache()
101
+
102
+ inputs = self.processor(image, return_tensors="pt")
103
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
104
+
105
+ with torch.no_grad():
106
+ output = self.model.generate(
107
+ **inputs,
108
+ max_length=50,
109
+ num_beams=4,
110
+ early_stopping=True,
111
+ do_sample=False,
112
+ repetition_penalty=1.1,
113
+ length_penalty=1.0
114
+ )
115
+
116
+ caption = self.processor.decode(output[0], skip_special_tokens=True)
117
+
118
+ del inputs, output
119
+ if self.device == 'cuda':
120
+ torch.cuda.empty_cache()
121
+
122
+ processing_time = time.time() - start_time
123
+
124
+ return {
125
+ 'caption': caption,
126
+ 'processing_time': processing_time,
127
+ 'confidence': 0.85,
128
+ 'error': None
129
+ }
130
+
131
+ except Exception as e:
132
+ return {
133
+ 'caption': 'Processing failed',
134
+ 'processing_time': time.time() - start_time,
135
+ 'confidence': 0.0,
136
+ 'error': f'Error: {e}'
137
+ }
138
+
139
+ def create_alt_variations(self, caption):
140
+ """Create different types of alt tags from caption"""
141
+ caption = caption.strip()
142
+
143
+ if caption.lower().startswith('a '):
144
+ clean_caption = caption[2:]
145
+ elif caption.lower().startswith('an '):
146
+ clean_caption = caption[3:]
147
+ else:
148
+ clean_caption = caption
149
+
150
+ words = clean_caption.split()
151
+ variations = {}
152
+
153
+ # SHORT: 3-4 key words
154
+ if len(words) >= 3:
155
+ key_words = []
156
+ for word in words[:5]:
157
+ if word.lower() not in ['with', 'a', 'lot', 'of', 'the', 'and', 'or', 'in', 'on', 'at']:
158
+ key_words.append(word)
159
+ variations['short'] = ' '.join(key_words[:3])
160
+ else:
161
+ variations['short'] = clean_caption
162
+
163
+ # MEDIUM: 6-8 words
164
+ if len(words) <= 8:
165
+ variations['medium'] = clean_caption
166
+ else:
167
+ medium_words = words[:8]
168
+ for i in range(6, 8):
169
+ if i < len(words) and words[i].lower() in ['and', 'or', 'with', 'in', 'on', 'at']:
170
+ medium_words = words[:i]
171
+ break
172
+ variations['medium'] = ' '.join(medium_words)
173
+
174
+ # LONG: Full caption
175
+ variations['long'] = clean_caption
176
+
177
+ # ACCESSIBILITY: Optimized for screen readers
178
+ variations['accessibility'] = f"Image shows {clean_caption.lower()}"
179
+
180
+ # SEO: Keywords only
181
+ stop_words = {'a', 'an', 'the', 'of', 'with', 'in', 'on', 'at', 'and', 'or', 'but', 'is', 'are'}
182
+ seo_words = [word for word in words if word.lower() not in stop_words]
183
+ variations['seo'] = ' '.join(seo_words[:6]).lower()
184
+
185
+ # Clean up all variations
186
+ for key in variations:
187
+ variations[key] = variations[key].strip()
188
+ if not variations[key]:
189
+ variations[key] = 'Image'
190
+
191
+ return variations
192
+
193
+ def generate_alt_tags(self, image_source):
194
+ """Main method to generate comprehensive alt tags"""
195
+ total_start = time.time()
196
+
197
+ if self.verbose:
198
+ print(f"🎯 Generating alt tags...")
199
+
200
+ try:
201
+ image = self.load_image(image_source)
202
+ caption_result = self.generate_caption(image)
203
+
204
+ if caption_result['error']:
205
+ raise Exception(caption_result['error'])
206
+
207
+ alt_variations = self.create_alt_variations(caption_result['caption'])
208
+ total_time = time.time() - total_start
209
+
210
+ results = {
211
+ 'source': str(image_source),
212
+ 'image_size': image.size,
213
+ 'caption': caption_result['caption'],
214
+ 'alt_tags': alt_variations,
215
+ 'confidence': caption_result['confidence'],
216
+ 'processing_time': {
217
+ 'caption': caption_result['processing_time'],
218
+ 'total': total_time
219
+ },
220
+ 'device_used': self.device,
221
+ 'model_info': {
222
+ 'name': 'BLIP Image Captioning',
223
+ 'version': 'base',
224
+ 'provider': 'Salesforce'
225
+ }
226
+ }
227
+
228
+ if self.verbose:
229
+ print(f"✅ Alt tags generated in {total_time:.2f}s")
230
+
231
+ return results
232
+
233
+ except Exception as e:
234
+ return {
235
+ 'error': str(e),
236
+ 'alt_tags': {
237
+ 'short': 'Image',
238
+ 'medium': 'Image content unavailable',
239
+ 'long': 'Image content unavailable',
240
+ 'accessibility': 'Image: content unavailable',
241
+ 'seo': 'image'
242
+ },
243
+ 'processing_time': {'total': time.time() - total_start},
244
+ 'device_used': self.device
245
+ }
246
+
247
+ def display_results(self, results):
248
+ """Display results in a formatted way"""
249
+ if 'error' in results:
250
+ print(f"❌ Error: {results['error']}")
251
+ return
252
+
253
+ print(f"\nALT TAG RESULTS")
254
+ print("=" * 50)
255
+ print(f"Source: {results['source']}")
256
+ print(f"Size: {results['image_size']}")
257
+ print(f"Caption: {results['caption']}")
258
+ print(f"Device: {results['device_used']}")
259
+ print(f"Time: {results['processing_time']['total']:.2f}s")
260
+ print(f"Confidence: {results['confidence']:.2f}")
261
+
262
+ print(f"\nALT TAG VARIATIONS:")
263
+ print("-" * 30)
264
+ for tag_type, alt_text in results['alt_tags'].items():
265
+ print(f"{tag_type.upper():>13}: {alt_text}")
266
+ print("=" * 50)
267
+
268
+ def quick_demo():
269
+ """Quick demonstration of the alt tag generator"""
270
+ print("🎯 Quick Demo - AI Alt Tag Generator")
271
+ print("=" * 40)
272
+
273
+ test_images = [
274
+ "https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400",
275
+ "https://images.unsplash.com/photo-1546069901-ba9599a7e63c?w=400",
276
+ ]
277
+
278
+ generator = AltTagGenerator()
279
+
280
+ for i, image_url in enumerate(test_images, 1):
281
+ print(f"\n🖼️ Test Image {i}:")
282
+ print(f"URL: {image_url}")
283
+
284
+ try:
285
+ results = generator.generate_alt_tags(image_url)
286
+
287
+ if 'error' not in results:
288
+ print(f"✅ Caption: {results['caption']}")
289
+ print(f"⏱️ Time: {results['processing_time']['total']:.2f}s")
290
+ print(f"📝 Alt tags:")
291
+ for tag_type, alt_text in results['alt_tags'].items():
292
+ print(f" {tag_type}: {alt_text}")
293
+ else:
294
+ print(f"❌ Error: {results['error']}")
295
+
296
+ except Exception as e:
297
+ print(f"❌ Exception: {e}")
298
+
299
+ if __name__ == "__main__":
300
+ quick_demo()