Update processing_phi3_v.py
#30
by
rageSpin
- opened
- processing_phi3_v.py +22 -4
processing_phi3_v.py
CHANGED
|
@@ -56,7 +56,7 @@ logger = logging.get_logger(__name__)
|
|
| 56 |
if is_vision_available():
|
| 57 |
from PIL import Image
|
| 58 |
|
| 59 |
-
import torch
|
| 60 |
import torchvision
|
| 61 |
|
| 62 |
def padding_336(b):
|
|
@@ -205,6 +205,22 @@ class Phi3VImageProcessor(BaseImageProcessor):
|
|
| 205 |
num_img_tokens = int((new_height // 336 * new_width // 336 + 1) * 144 + 1 + (new_height // 336 + 1) * 12)
|
| 206 |
return num_img_tokens
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
def preprocess(
|
| 209 |
self,
|
| 210 |
images: ImageInput,
|
|
@@ -245,8 +261,8 @@ class Phi3VImageProcessor(BaseImageProcessor):
|
|
| 245 |
"torch.Tensor, tf.Tensor or jax.ndarray."
|
| 246 |
)
|
| 247 |
|
| 248 |
-
if do_convert_rgb:
|
| 249 |
-
images = [convert_to_rgb(image) for image in images]
|
| 250 |
|
| 251 |
image_sizes = []
|
| 252 |
img_processor = torchvision.transforms.Compose([
|
|
@@ -256,7 +272,9 @@ class Phi3VImageProcessor(BaseImageProcessor):
|
|
| 256 |
|
| 257 |
# PIL images
|
| 258 |
# HD_transform pad images to size of multiiply of 336, 336
|
| 259 |
-
# convert
|
|
|
|
|
|
|
| 260 |
images = [image.convert('RGB') for image in images]
|
| 261 |
elems = [HD_transform(im, hd_num = self.num_crops) for im in images]
|
| 262 |
# tensor transform and normalize
|
|
|
|
| 56 |
if is_vision_available():
|
| 57 |
from PIL import Image
|
| 58 |
|
| 59 |
+
import torch # why is this imported twice?
|
| 60 |
import torchvision
|
| 61 |
|
| 62 |
def padding_336(b):
|
|
|
|
| 205 |
num_img_tokens = int((new_height // 336 * new_width // 336 + 1) * 144 + 1 + (new_height // 336 + 1) * 12)
|
| 206 |
return num_img_tokens
|
| 207 |
|
| 208 |
+
def convert_PIL(self, image):
|
| 209 |
+
"""
|
| 210 |
+
Convert an image to a PIL Image object if it is not already one.
|
| 211 |
+
|
| 212 |
+
Args:
|
| 213 |
+
image (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`): The image to be converted. Can be a numpy array or a torch tensor or PIL object.
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
A PIL Image object.
|
| 217 |
+
"""
|
| 218 |
+
if not isinstance(image, Image.Image):
|
| 219 |
+
return torchvision.transforms.functional.to_pil_image(image)
|
| 220 |
+
else:
|
| 221 |
+
return image
|
| 222 |
+
|
| 223 |
+
|
| 224 |
def preprocess(
|
| 225 |
self,
|
| 226 |
images: ImageInput,
|
|
|
|
| 261 |
"torch.Tensor, tf.Tensor or jax.ndarray."
|
| 262 |
)
|
| 263 |
|
| 264 |
+
# if do_convert_rgb:
|
| 265 |
+
# images = [convert_to_rgb(image) for image in images]
|
| 266 |
|
| 267 |
image_sizes = []
|
| 268 |
img_processor = torchvision.transforms.Compose([
|
|
|
|
| 272 |
|
| 273 |
# PIL images
|
| 274 |
# HD_transform pad images to size of multiiply of 336, 336
|
| 275 |
+
# check and convert if the images are in PIL format
|
| 276 |
+
images = [convert_PIL(image) for image in images]
|
| 277 |
+
# convert to RGB first (I think the argument "do_convert_rgb is useless, since it is forced here")
|
| 278 |
images = [image.convert('RGB') for image in images]
|
| 279 |
elems = [HD_transform(im, hd_num = self.num_crops) for im in images]
|
| 280 |
# tensor transform and normalize
|