File size: 1,626 Bytes
3c65a51 a01b43c 968e3bc 3c65a51 0611e70 3c65a51 968e3bc 3c65a51 968e3bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from typing import Dict, List, Any
from io import BytesIO
import base64
import logging
import uform
from PIL import Image
import numpy as np
class EndpointHandler():
def __init__(self, path=""):
self.model, self.processor = uform.get_model('unum-cloud/uform-vl-multilingual-v2')
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
image (:obj:`string`)
candidates (:obj:`list`)
Return:
A :obj:`list`: une liste permettant de passer les embedding
"""
inputs_request = data.pop("inputs", data)
# decode base64 image to PIL
image = Image.open(BytesIO(base64.b64decode(inputs_request['image'])))
text = inputs_request['text']
image_data = self.processor.preprocess_image(image)
text_data = self.processor.preprocess_text(text)
image_features, image_embedding = self.model.encode_image(image_data)
text_features, text_embedding = self.model.encode_text(text_data)
joint_embedding = self.model.encode_multimodal(image=image_data, text=text_data)
# Convert embeddings to lists of floats
serializable_results = {
'joint_embedding': joint_embedding.tolist() if isinstance(joint_embedding, np.ndarray) else joint_embedding,
'text_embedding': text_embedding.tolist() if isinstance(text_embedding, np.ndarray) else text_embedding,
'image_embedding': image_embedding.tolist() if isinstance(image_embedding, np.ndarray) else image_embedding
}
return serializable_results
|