trendyol-dino-v2-ecommerce-256d / image_processing_trendyol_dinov2.py
yusufcakmak's picture
feat: test upload - Trendyol DinoV2 Product Similarity and Retrieval Embedding Model
a15fec5 verified
"""
Hugging Face compatible image processor for Trendyol DinoV2
"""
from transformers import ImageProcessingMixin, BatchFeature
from transformers.utils import TensorType
from PIL import Image
import torch
import numpy as np
import cv2
from torchvision import transforms
import torchvision.transforms.functional as TF
from io import BytesIO
from typing import Union, List, Optional
def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image:
"""Downscale image while maintaining aspect ratio"""
original_width, original_height = image.size
if max(original_width, original_height) <= max_dimension:
return image
aspect_ratio = original_width / original_height
if original_width > original_height:
new_width = max_dimension
new_height = int(max_dimension / aspect_ratio)
else:
new_height = max_dimension
new_width = int(max_dimension * aspect_ratio)
return image.resize((new_width, new_height), Image.LANCZOS)
class DownScaleLanczos:
def __init__(self, target_size=384):
self.target_size = target_size
def __call__(self, img):
return downscale_image(img, self.target_size)
class JPEGCompression:
def __init__(self, quality=75):
self.quality = quality
def __call__(self, img):
buffer = BytesIO()
img.save(buffer, format='JPEG', quality=self.quality)
buffer.seek(0)
return Image.open(buffer)
class ScaleImage:
def __init__(self, target_size):
self.target_size = target_size
def __call__(self, img):
w, h = img.size
max_size = max(h, w)
scale = self.target_size / max_size
new_size = int(w * scale), int(h * scale)
return img.resize(new_size, Image.BILINEAR)
class PadToSquare:
def __init__(self, color=255):
self.color = color
def __call__(self, img):
if isinstance(img, np.ndarray):
img = Image.fromarray(img)
width, height = img.size
if self.color != -1:
padding = abs(width - height) // 2
if width < height:
return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant')
elif width > height:
return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant')
return img
class TrendyolDinoV2ImageProcessor(ImageProcessingMixin):
"""
Hugging Face compatible image processor for TrendyolDinoV2 model.
"""
model_input_names = ["pixel_values"]
def __init__(
self,
input_size=224,
downscale_size=332,
pad_color=255,
jpeg_quality=75,
do_normalize=True,
image_mean=(0.485, 0.456, 0.406),
image_std=(0.229, 0.224, 0.225),
**kwargs
):
super().__init__(**kwargs)
self.input_size = input_size
self.downscale_size = downscale_size
self.pad_color = pad_color
self.jpeg_quality = jpeg_quality
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
def _get_preprocess_fn(self):
"""Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)"""
return transforms.Compose([
DownScaleLanczos(self.downscale_size),
JPEGCompression(self.jpeg_quality),
ScaleImage(self.downscale_size),
PadToSquare(self.pad_color),
transforms.Resize((self.input_size, self.input_size)),
transforms.ToTensor(),
transforms.Normalize(self.image_mean, self.image_std)
])
def __call__(
self,
images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]],
return_tensors: Optional[Union[str, TensorType]] = None,
**kwargs
) -> BatchFeature:
"""
Preprocess images for the model.
"""
# Handle single image
if not isinstance(images, list):
images = [images]
# Get preprocessing pipeline
preprocess_fn = self._get_preprocess_fn()
# Preprocess all images
processed_images = []
for image in images:
if isinstance(image, str):
image = Image.open(image).convert('RGB')
elif isinstance(image, np.ndarray):
image = Image.fromarray(image).convert('RGB')
elif not isinstance(image, Image.Image):
raise ValueError(f"Unsupported image type: {type(image)}")
# Apply preprocessing
processed_tensor = preprocess_fn(image)
processed_images.append(processed_tensor)
# Stack tensors
pixel_values = torch.stack(processed_images)
# Return BatchFeature
data = {"pixel_values": pixel_values}
return BatchFeature(data=data, tensor_type=return_tensors)
# Register for auto class
TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor")