trendyol-dino-v2-ecommerce-256d / image_processing_trendyol_dinov2.py

feat: test upload - Trendyol DinoV2 Product Similarity and Retrieval Embedding Model

a15fec5 verified 13 days ago

5.23 kB

	"""
	Hugging Face compatible image processor for Trendyol DinoV2
	"""
	from transformers import ImageProcessingMixin, BatchFeature
	from transformers.utils import TensorType
	from PIL import Image
	import torch
	import numpy as np
	import cv2
	from torchvision import transforms
	import torchvision.transforms.functional as TF
	from io import BytesIO
	from typing import Union, List, Optional


	def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image:
	"""Downscale image while maintaining aspect ratio"""
	original_width, original_height = image.size

	if max(original_width, original_height) <= max_dimension:
	return image

	aspect_ratio = original_width / original_height

	if original_width > original_height:
	new_width = max_dimension
	new_height = int(max_dimension / aspect_ratio)
	else:
	new_height = max_dimension
	new_width = int(max_dimension * aspect_ratio)

	return image.resize((new_width, new_height), Image.LANCZOS)


	class DownScaleLanczos:
	def __init__(self, target_size=384):
	self.target_size = target_size

	def __call__(self, img):
	return downscale_image(img, self.target_size)


	class JPEGCompression:
	def __init__(self, quality=75):
	self.quality = quality

	def __call__(self, img):
	buffer = BytesIO()
	img.save(buffer, format='JPEG', quality=self.quality)
	buffer.seek(0)
	return Image.open(buffer)


	class ScaleImage:
	def __init__(self, target_size):
	self.target_size = target_size

	def __call__(self, img):
	w, h = img.size
	max_size = max(h, w)
	scale = self.target_size / max_size
	new_size = int(w * scale), int(h * scale)
	return img.resize(new_size, Image.BILINEAR)


	class PadToSquare:
	def __init__(self, color=255):
	self.color = color

	def __call__(self, img):
	if isinstance(img, np.ndarray):
	img = Image.fromarray(img)

	width, height = img.size
	if self.color != -1:
	padding = abs(width - height) // 2
	if width < height:
	return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant')
	elif width > height:
	return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant')
	return img


	class TrendyolDinoV2ImageProcessor(ImageProcessingMixin):
	"""
	Hugging Face compatible image processor for TrendyolDinoV2 model.
	"""

	model_input_names = ["pixel_values"]

	def __init__(
	self,
	input_size=224,
	downscale_size=332,
	pad_color=255,
	jpeg_quality=75,
	do_normalize=True,
	image_mean=(0.485, 0.456, 0.406),
	image_std=(0.229, 0.224, 0.225),
	**kwargs
	):
	super().__init__(**kwargs)

	self.input_size = input_size
	self.downscale_size = downscale_size
	self.pad_color = pad_color
	self.jpeg_quality = jpeg_quality
	self.do_normalize = do_normalize
	self.image_mean = image_mean
	self.image_std = image_std

	def _get_preprocess_fn(self):
	"""Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)"""
	return transforms.Compose([
	DownScaleLanczos(self.downscale_size),
	JPEGCompression(self.jpeg_quality),
	ScaleImage(self.downscale_size),
	PadToSquare(self.pad_color),
	transforms.Resize((self.input_size, self.input_size)),
	transforms.ToTensor(),
	transforms.Normalize(self.image_mean, self.image_std)
	])

	def __call__(
	self,
	images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]],
	return_tensors: Optional[Union[str, TensorType]] = None,
	**kwargs
	) -> BatchFeature:
	"""
	Preprocess images for the model.
	"""
	# Handle single image
	if not isinstance(images, list):
	images = [images]

	# Get preprocessing pipeline
	preprocess_fn = self._get_preprocess_fn()

	# Preprocess all images
	processed_images = []
	for image in images:
	if isinstance(image, str):
	image = Image.open(image).convert('RGB')
	elif isinstance(image, np.ndarray):
	image = Image.fromarray(image).convert('RGB')
	elif not isinstance(image, Image.Image):
	raise ValueError(f"Unsupported image type: {type(image)}")

	# Apply preprocessing
	processed_tensor = preprocess_fn(image)
	processed_images.append(processed_tensor)

	# Stack tensors
	pixel_values = torch.stack(processed_images)

	# Return BatchFeature
	data = {"pixel_values": pixel_values}
	return BatchFeature(data=data, tensor_type=return_tensors)


	# Register for auto class
	TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor")