import base64 from io import BytesIO from typing import Any, List, Dict from PIL import Image from transformers import AutoTokenizer, AutoModel class EndpointHandler(): def __init__(self, path=""): # Use a pipeline as a high-level helper model_name = "SwordElucidator/MiniCPM-Llama3-V-2_5" model = AutoModel.from_pretrained(model_name, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model.eval() self.model = model self.tokenizer = tokenizer def __call__(self, data: Any) -> List[List[Dict[str, float]]]: inputs = data.pop("inputs", data) image = inputs.pop("image", None) # base64 image as bytes question = inputs.pop("question", None) msgs = inputs.pop("msgs", None) parameters = data.pop("parameters", {}) image = Image.open(BytesIO(base64.b64decode(image))) if not msgs: msgs = [{'role': 'user', 'content': question}] res = self.model.chat( image=image, msgs=msgs, tokenizer=self.tokenizer, sampling=True, # if sampling=False, beam_search will be used by default temperature=parameters.get('temperature', 0.7), # system_prompt='' # pass system_prompt if needed ) return res