File size: 5,234 Bytes
a15fec5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""
Hugging Face compatible image processor for Trendyol DinoV2
"""
from transformers import ImageProcessingMixin, BatchFeature
from transformers.utils import TensorType
from PIL import Image
import torch
import numpy as np
import cv2
from torchvision import transforms
import torchvision.transforms.functional as TF
from io import BytesIO
from typing import Union, List, Optional
def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image:
"""Downscale image while maintaining aspect ratio"""
original_width, original_height = image.size
if max(original_width, original_height) <= max_dimension:
return image
aspect_ratio = original_width / original_height
if original_width > original_height:
new_width = max_dimension
new_height = int(max_dimension / aspect_ratio)
else:
new_height = max_dimension
new_width = int(max_dimension * aspect_ratio)
return image.resize((new_width, new_height), Image.LANCZOS)
class DownScaleLanczos:
def __init__(self, target_size=384):
self.target_size = target_size
def __call__(self, img):
return downscale_image(img, self.target_size)
class JPEGCompression:
def __init__(self, quality=75):
self.quality = quality
def __call__(self, img):
buffer = BytesIO()
img.save(buffer, format='JPEG', quality=self.quality)
buffer.seek(0)
return Image.open(buffer)
class ScaleImage:
def __init__(self, target_size):
self.target_size = target_size
def __call__(self, img):
w, h = img.size
max_size = max(h, w)
scale = self.target_size / max_size
new_size = int(w * scale), int(h * scale)
return img.resize(new_size, Image.BILINEAR)
class PadToSquare:
def __init__(self, color=255):
self.color = color
def __call__(self, img):
if isinstance(img, np.ndarray):
img = Image.fromarray(img)
width, height = img.size
if self.color != -1:
padding = abs(width - height) // 2
if width < height:
return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant')
elif width > height:
return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant')
return img
class TrendyolDinoV2ImageProcessor(ImageProcessingMixin):
"""
Hugging Face compatible image processor for TrendyolDinoV2 model.
"""
model_input_names = ["pixel_values"]
def __init__(
self,
input_size=224,
downscale_size=332,
pad_color=255,
jpeg_quality=75,
do_normalize=True,
image_mean=(0.485, 0.456, 0.406),
image_std=(0.229, 0.224, 0.225),
**kwargs
):
super().__init__(**kwargs)
self.input_size = input_size
self.downscale_size = downscale_size
self.pad_color = pad_color
self.jpeg_quality = jpeg_quality
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
def _get_preprocess_fn(self):
"""Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)"""
return transforms.Compose([
DownScaleLanczos(self.downscale_size),
JPEGCompression(self.jpeg_quality),
ScaleImage(self.downscale_size),
PadToSquare(self.pad_color),
transforms.Resize((self.input_size, self.input_size)),
transforms.ToTensor(),
transforms.Normalize(self.image_mean, self.image_std)
])
def __call__(
self,
images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]],
return_tensors: Optional[Union[str, TensorType]] = None,
**kwargs
) -> BatchFeature:
"""
Preprocess images for the model.
"""
# Handle single image
if not isinstance(images, list):
images = [images]
# Get preprocessing pipeline
preprocess_fn = self._get_preprocess_fn()
# Preprocess all images
processed_images = []
for image in images:
if isinstance(image, str):
image = Image.open(image).convert('RGB')
elif isinstance(image, np.ndarray):
image = Image.fromarray(image).convert('RGB')
elif not isinstance(image, Image.Image):
raise ValueError(f"Unsupported image type: {type(image)}")
# Apply preprocessing
processed_tensor = preprocess_fn(image)
processed_images.append(processed_tensor)
# Stack tensors
pixel_values = torch.stack(processed_images)
# Return BatchFeature
data = {"pixel_values": pixel_values}
return BatchFeature(data=data, tensor_type=return_tensors)
# Register for auto class
TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor")
|