import base64
from io import BytesIO
from typing import Any, List, Dict

from PIL import Image
from transformers import AutoTokenizer, AutoModel


class EndpointHandler():
    def __init__(self, path=""):
        # Use a pipeline as a high-level helper
        model_name = "SwordElucidator/MiniCPM-Llama3-V-2_5"
        model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        model.eval()
        self.model = model
        self.tokenizer = tokenizer

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        inputs = data.pop("inputs", data)

        image = inputs.pop("image", None)  #  base64 image as bytes
        question = inputs.pop("question", None)
        msgs = inputs.pop("msgs", None)


        parameters = data.pop("parameters", {})

        image = Image.open(BytesIO(base64.b64decode(image)))

        if not msgs:
            msgs = [{'role': 'user', 'content': question}]

        res = self.model.chat(
            image=image,
            msgs=msgs,
            tokenizer=self.tokenizer,
            sampling=True,  # if sampling=False, beam_search will be used by default
            temperature=parameters.get('temperature', 0.7),
            # system_prompt='' # pass system_prompt if needed
        )

        return res