
feat: test upload - Trendyol DinoV2 Product Similarity and Retrieval Embedding Model
a15fec5
verified
""" | |
Hugging Face compatible image processor for Trendyol DinoV2 | |
""" | |
from transformers import ImageProcessingMixin, BatchFeature | |
from transformers.utils import TensorType | |
from PIL import Image | |
import torch | |
import numpy as np | |
import cv2 | |
from torchvision import transforms | |
import torchvision.transforms.functional as TF | |
from io import BytesIO | |
from typing import Union, List, Optional | |
def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image: | |
"""Downscale image while maintaining aspect ratio""" | |
original_width, original_height = image.size | |
if max(original_width, original_height) <= max_dimension: | |
return image | |
aspect_ratio = original_width / original_height | |
if original_width > original_height: | |
new_width = max_dimension | |
new_height = int(max_dimension / aspect_ratio) | |
else: | |
new_height = max_dimension | |
new_width = int(max_dimension * aspect_ratio) | |
return image.resize((new_width, new_height), Image.LANCZOS) | |
class DownScaleLanczos: | |
def __init__(self, target_size=384): | |
self.target_size = target_size | |
def __call__(self, img): | |
return downscale_image(img, self.target_size) | |
class JPEGCompression: | |
def __init__(self, quality=75): | |
self.quality = quality | |
def __call__(self, img): | |
buffer = BytesIO() | |
img.save(buffer, format='JPEG', quality=self.quality) | |
buffer.seek(0) | |
return Image.open(buffer) | |
class ScaleImage: | |
def __init__(self, target_size): | |
self.target_size = target_size | |
def __call__(self, img): | |
w, h = img.size | |
max_size = max(h, w) | |
scale = self.target_size / max_size | |
new_size = int(w * scale), int(h * scale) | |
return img.resize(new_size, Image.BILINEAR) | |
class PadToSquare: | |
def __init__(self, color=255): | |
self.color = color | |
def __call__(self, img): | |
if isinstance(img, np.ndarray): | |
img = Image.fromarray(img) | |
width, height = img.size | |
if self.color != -1: | |
padding = abs(width - height) // 2 | |
if width < height: | |
return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant') | |
elif width > height: | |
return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant') | |
return img | |
class TrendyolDinoV2ImageProcessor(ImageProcessingMixin): | |
""" | |
Hugging Face compatible image processor for TrendyolDinoV2 model. | |
""" | |
model_input_names = ["pixel_values"] | |
def __init__( | |
self, | |
input_size=224, | |
downscale_size=332, | |
pad_color=255, | |
jpeg_quality=75, | |
do_normalize=True, | |
image_mean=(0.485, 0.456, 0.406), | |
image_std=(0.229, 0.224, 0.225), | |
**kwargs | |
): | |
super().__init__(**kwargs) | |
self.input_size = input_size | |
self.downscale_size = downscale_size | |
self.pad_color = pad_color | |
self.jpeg_quality = jpeg_quality | |
self.do_normalize = do_normalize | |
self.image_mean = image_mean | |
self.image_std = image_std | |
def _get_preprocess_fn(self): | |
"""Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)""" | |
return transforms.Compose([ | |
DownScaleLanczos(self.downscale_size), | |
JPEGCompression(self.jpeg_quality), | |
ScaleImage(self.downscale_size), | |
PadToSquare(self.pad_color), | |
transforms.Resize((self.input_size, self.input_size)), | |
transforms.ToTensor(), | |
transforms.Normalize(self.image_mean, self.image_std) | |
]) | |
def __call__( | |
self, | |
images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]], | |
return_tensors: Optional[Union[str, TensorType]] = None, | |
**kwargs | |
) -> BatchFeature: | |
""" | |
Preprocess images for the model. | |
""" | |
# Handle single image | |
if not isinstance(images, list): | |
images = [images] | |
# Get preprocessing pipeline | |
preprocess_fn = self._get_preprocess_fn() | |
# Preprocess all images | |
processed_images = [] | |
for image in images: | |
if isinstance(image, str): | |
image = Image.open(image).convert('RGB') | |
elif isinstance(image, np.ndarray): | |
image = Image.fromarray(image).convert('RGB') | |
elif not isinstance(image, Image.Image): | |
raise ValueError(f"Unsupported image type: {type(image)}") | |
# Apply preprocessing | |
processed_tensor = preprocess_fn(image) | |
processed_images.append(processed_tensor) | |
# Stack tensors | |
pixel_values = torch.stack(processed_images) | |
# Return BatchFeature | |
data = {"pixel_values": pixel_values} | |
return BatchFeature(data=data, tensor_type=return_tensors) | |
# Register for auto class | |
TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor") | |