Straive
/

llava-v1.6-34b-hf

Image-Text-to-Text

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

saurabh-straive commited on Mar 21, 2024

Commit

7c2538e

·

verified ·

1 Parent(s): 647f969

Create handler.py

Files changed (1) hide show

handler.py +31 -0

handler.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+class EndpointHandler():
+    def __init__(self, path=""):
+        disable_torch_init()
+        device = torch.cuda_device
+        self.processor = LlavaNextProcessor.from_pretrained(path, use_fast=False)
+        self.model = LlavaNextForConditionalGeneration.from_pretrained(
+            path,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True,
+            load_in_4bit=True
+        )
+        self.model.to("cuda:0")
+    def __call__(self, data):
+        image_encoded = data.pop("inputs", data)
+        prompt = data["text"]
+        image = self.decode_base64_image(image_encoded)
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        inputs = self.processor(prompt, image, return_tensors="pt").to("cuda:0")
+        # autoregressively complete prompt
+        output = self.model.generate(**inputs, max_new_tokens=500)
+        return processor.decode(output[0], skip_special_tokens=True)