""" Hugging Face compatible image processor for Trendyol DinoV2 """ from transformers import ImageProcessingMixin, BatchFeature from transformers.utils import TensorType from PIL import Image import torch import numpy as np import cv2 from torchvision import transforms import torchvision.transforms.functional as TF from io import BytesIO from typing import Union, List, Optional def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image: """Downscale image while maintaining aspect ratio""" original_width, original_height = image.size if max(original_width, original_height) <= max_dimension: return image aspect_ratio = original_width / original_height if original_width > original_height: new_width = max_dimension new_height = int(max_dimension / aspect_ratio) else: new_height = max_dimension new_width = int(max_dimension * aspect_ratio) return image.resize((new_width, new_height), Image.LANCZOS) class DownScaleLanczos: def __init__(self, target_size=384): self.target_size = target_size def __call__(self, img): return downscale_image(img, self.target_size) class JPEGCompression: def __init__(self, quality=75): self.quality = quality def __call__(self, img): buffer = BytesIO() img.save(buffer, format='JPEG', quality=self.quality) buffer.seek(0) return Image.open(buffer) class ScaleImage: def __init__(self, target_size): self.target_size = target_size def __call__(self, img): w, h = img.size max_size = max(h, w) scale = self.target_size / max_size new_size = int(w * scale), int(h * scale) return img.resize(new_size, Image.BILINEAR) class PadToSquare: def __init__(self, color=255): self.color = color def __call__(self, img): if isinstance(img, np.ndarray): img = Image.fromarray(img) width, height = img.size if self.color != -1: padding = abs(width - height) // 2 if width < height: return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant') elif width > height: return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant') return img class TrendyolDinoV2ImageProcessor(ImageProcessingMixin): """ Hugging Face compatible image processor for TrendyolDinoV2 model. """ model_input_names = ["pixel_values"] def __init__( self, input_size=224, downscale_size=332, pad_color=255, jpeg_quality=75, do_normalize=True, image_mean=(0.485, 0.456, 0.406), image_std=(0.229, 0.224, 0.225), **kwargs ): super().__init__(**kwargs) self.input_size = input_size self.downscale_size = downscale_size self.pad_color = pad_color self.jpeg_quality = jpeg_quality self.do_normalize = do_normalize self.image_mean = image_mean self.image_std = image_std def _get_preprocess_fn(self): """Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)""" return transforms.Compose([ DownScaleLanczos(self.downscale_size), JPEGCompression(self.jpeg_quality), ScaleImage(self.downscale_size), PadToSquare(self.pad_color), transforms.Resize((self.input_size, self.input_size)), transforms.ToTensor(), transforms.Normalize(self.image_mean, self.image_std) ]) def __call__( self, images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]], return_tensors: Optional[Union[str, TensorType]] = None, **kwargs ) -> BatchFeature: """ Preprocess images for the model. """ # Handle single image if not isinstance(images, list): images = [images] # Get preprocessing pipeline preprocess_fn = self._get_preprocess_fn() # Preprocess all images processed_images = [] for image in images: if isinstance(image, str): image = Image.open(image).convert('RGB') elif isinstance(image, np.ndarray): image = Image.fromarray(image).convert('RGB') elif not isinstance(image, Image.Image): raise ValueError(f"Unsupported image type: {type(image)}") # Apply preprocessing processed_tensor = preprocess_fn(image) processed_images.append(processed_tensor) # Stack tensors pixel_values = torch.stack(processed_images) # Return BatchFeature data = {"pixel_values": pixel_values} return BatchFeature(data=data, tensor_type=return_tensors) # Register for auto class TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor")