File size: 5,234 Bytes
a15fec5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
Hugging Face compatible image processor for Trendyol DinoV2
"""
from transformers import ImageProcessingMixin, BatchFeature
from transformers.utils import TensorType
from PIL import Image
import torch
import numpy as np
import cv2
from torchvision import transforms
import torchvision.transforms.functional as TF
from io import BytesIO
from typing import Union, List, Optional


def downscale_image(image: Image.Image, max_dimension: int) -> Image.Image:
    """Downscale image while maintaining aspect ratio"""
    original_width, original_height = image.size
    
    if max(original_width, original_height) <= max_dimension:
        return image
    
    aspect_ratio = original_width / original_height
    
    if original_width > original_height:
        new_width = max_dimension
        new_height = int(max_dimension / aspect_ratio)
    else:
        new_height = max_dimension
        new_width = int(max_dimension * aspect_ratio)
    
    return image.resize((new_width, new_height), Image.LANCZOS)


class DownScaleLanczos:
    def __init__(self, target_size=384):
        self.target_size = target_size

    def __call__(self, img):
        return downscale_image(img, self.target_size)


class JPEGCompression:
    def __init__(self, quality=75):
        self.quality = quality

    def __call__(self, img):
        buffer = BytesIO()
        img.save(buffer, format='JPEG', quality=self.quality)
        buffer.seek(0)
        return Image.open(buffer)


class ScaleImage:
    def __init__(self, target_size):
        self.target_size = target_size
        
    def __call__(self, img):
        w, h = img.size
        max_size = max(h, w)
        scale = self.target_size / max_size
        new_size = int(w * scale), int(h * scale)
        return img.resize(new_size, Image.BILINEAR)


class PadToSquare:
    def __init__(self, color=255):
        self.color = color
        
    def __call__(self, img):
        if isinstance(img, np.ndarray):
            img = Image.fromarray(img)
        
        width, height = img.size
        if self.color != -1:
            padding = abs(width - height) // 2
            if width < height:
                return TF.pad(img, (padding, 0, padding + (height - width) % 2, 0), fill=self.color, padding_mode='constant')
            elif width > height:
                return TF.pad(img, (0, padding, 0, padding + (width - height) % 2), fill=self.color, padding_mode='constant')
        return img


class TrendyolDinoV2ImageProcessor(ImageProcessingMixin):
    """
    Hugging Face compatible image processor for TrendyolDinoV2 model.
    """
    
    model_input_names = ["pixel_values"]
    
    def __init__(
        self,
        input_size=224,
        downscale_size=332,
        pad_color=255,
        jpeg_quality=75,
        do_normalize=True,
        image_mean=(0.485, 0.456, 0.406),
        image_std=(0.229, 0.224, 0.225),
        **kwargs
    ):
        super().__init__(**kwargs)
        
        self.input_size = input_size
        self.downscale_size = downscale_size
        self.pad_color = pad_color
        self.jpeg_quality = jpeg_quality
        self.do_normalize = do_normalize
        self.image_mean = image_mean
        self.image_std = image_std
    
    def _get_preprocess_fn(self):
        """Create the preprocessing pipeline (not stored as attribute to avoid JSON serialization issues)"""
        return transforms.Compose([
            DownScaleLanczos(self.downscale_size),
            JPEGCompression(self.jpeg_quality),
            ScaleImage(self.downscale_size),
            PadToSquare(self.pad_color),
            transforms.Resize((self.input_size, self.input_size)),
            transforms.ToTensor(),
            transforms.Normalize(self.image_mean, self.image_std)
        ])
    
    def __call__(
        self,
        images: Union[Image.Image, np.ndarray, torch.Tensor, List[Image.Image], List[np.ndarray], List[torch.Tensor]],
        return_tensors: Optional[Union[str, TensorType]] = None,
        **kwargs
    ) -> BatchFeature:
        """
        Preprocess images for the model.
        """
        # Handle single image
        if not isinstance(images, list):
            images = [images]
        
        # Get preprocessing pipeline
        preprocess_fn = self._get_preprocess_fn()
        
        # Preprocess all images
        processed_images = []
        for image in images:
            if isinstance(image, str):
                image = Image.open(image).convert('RGB')
            elif isinstance(image, np.ndarray):
                image = Image.fromarray(image).convert('RGB')
            elif not isinstance(image, Image.Image):
                raise ValueError(f"Unsupported image type: {type(image)}")
            
            # Apply preprocessing
            processed_tensor = preprocess_fn(image)
            processed_images.append(processed_tensor)
        
        # Stack tensors
        pixel_values = torch.stack(processed_images)
        
        # Return BatchFeature
        data = {"pixel_values": pixel_values}
        return BatchFeature(data=data, tensor_type=return_tensors)


# Register for auto class
TrendyolDinoV2ImageProcessor.register_for_auto_class("AutoImageProcessor")