|
import cv2 |
|
import random |
|
import gradio as gr |
|
from colorsys import rgb_to_hls, hls_to_rgb |
|
|
|
|
|
markdown_default = """ |
|
<link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet"> |
|
<style> |
|
.highlighted-text { |
|
font-family: 'Montserrat', sans-serif; |
|
font-weight: 600; |
|
font-size: 14px; |
|
color: rgb(255, 255, 239); |
|
background-color: rgb(225, 231, 254); |
|
border-radius: 7px; |
|
padding: 5px 7px; |
|
display: inline-block; |
|
} |
|
.regular-text { |
|
font-family: 'Montserrat', sans-serif; |
|
font-weight: 400; |
|
font-size: 14px; |
|
} |
|
.highlighted-response { |
|
font-family: 'Montserrat', sans-serif; |
|
font-weight: 600; |
|
font-size: 14px; |
|
border-radius: 6px; |
|
padding: 3px 4px; |
|
display: inline-block; |
|
} |
|
</style> |
|
<span class="highlighted-text" style='color:rgb(107, 100, 239)'>GLaMM</span> |
|
|
|
""" |
|
|
|
examples = [ |
|
["Could you provide me with a detailed analysis of this photo? Please output with interleaved segmentation masks" |
|
" for the corresponding parts of the answer.", "./images/demo_resources/tokyo.jpg", ], |
|
["Could you give a comprehensive explanation of what can be found within this picture? " |
|
"Please output with interleaved segmentation masks for the corresponding phrases.", |
|
"./images/demo_resources/mansion.jpg", ], |
|
["Can you please segment the yacht ?", "./images/demo_resources/yacht.jpg", ], |
|
["Can you segment the hot air balloon ?", "./images/demo_resources/balloon.jpg", ], |
|
["Could you please give me a detailed description of the image ?", "./images/demo_resources/beetle.jpg", ], |
|
["Could you provide me with a detailed analysis of this photo? " |
|
"Please output with interleaved segmentation masks for the corresponding parts of the answer.", |
|
"./images/demo_resources/joker.png", ], |
|
["Can you segment what the person is using to ride ?", "./images/demo_resources/surfer.jpg", ], |
|
["Can you segment the water around the person ?", "./images/demo_resources/paddle.jpg", ], |
|
["Could you provide me with a detailed analysis of this photo? " |
|
"Please output with interleaved segmentation masks for the corresponding parts of the answer.", |
|
"./images/demo_resources/snow.png", ], |
|
["What is she doing in this image ?", "./images/demo_resources/japan.jpg", ], ] |
|
|
|
title = "GLaMM : Grounding Large Multimodal Model" |
|
|
|
description = """ |
|
[](https://mbzuai-oryx.github.io/groundingLMM) |
|
|
|
**Usage** : <br> |
|
 (1) For **Grounded Caption Generation** Interleaved Segmentation, input prompt like: *"Could you provide me with a detailed analysis of this photo? Please output with interleaved segmentation masks for the corresponding parts of the answer."* <br> |
|
 (2) For **Segmentation Output**, input prompt like: *"Can you please segment xxx in the given image"* <br> |
|
 (3) To **Input Regions** : Draw boudning boxes over the uploaded image and input prompt like: *"Can you please describe this region <bbox>"* Need to give <bbox> identifier <br> |
|
 (4) For **Image Captioning** VQA, input prompt like: *"Could you please give me a detailed description of the image?"* <br> |
|
 (5) For **Conditional Generation** Image manipulation, first perform (2) then select generate and input prompt which describes the new image to be generated <br> |
|
""" |
|
|
|
article = """ |
|
<center> This is the online demo of GLaMM from MBZUAI. \n </center> |
|
""" |
|
|
|
colors = [ |
|
[255, 0, 0], |
|
[0, 255, 0], |
|
[0, 0, 255], |
|
[255, 255, 0], |
|
[0, 255, 255], |
|
[255, 0, 255], |
|
[255, 192, 203], |
|
[165, 42, 42], |
|
[255, 165, 0], |
|
[128, 0, 128], |
|
[0, 0, 128], |
|
[128, 0, 0], |
|
[128, 128, 0], |
|
[70, 130, 180], |
|
[173, 216, 230], |
|
[255, 192, 0], |
|
[255, 165, 165], |
|
[255, 20, 147], |
|
] |
|
|
|
|
|
def process_markdown(output_str, color_history): |
|
markdown_out = output_str.replace('[SEG]', '') |
|
markdown_out = markdown_out.replace( |
|
"<p>", "<span class='highlighted-response' style='background-color:rgb[COLOR]'>" |
|
) |
|
markdown_out = markdown_out.replace("</p>", "</span>") |
|
|
|
for color in color_history: |
|
markdown_out = markdown_out.replace("[COLOR]", str(desaturate(tuple(color))), 1) |
|
|
|
markdown_out = f""" |
|
<br> |
|
{markdown_out} |
|
|
|
""" |
|
markdown_out = markdown_default + "<p><span class='regular-text'>" + markdown_out + '</span></p>' |
|
return markdown_out |
|
|
|
|
|
def desaturate(rgb, factor=0.65): |
|
""" |
|
Desaturate an RGB color by a given factor. |
|
|
|
:param rgb: A tuple of (r, g, b) where each value is in [0, 255]. |
|
:param factor: The factor by which to reduce the saturation. |
|
0 means completely desaturated, 1 means original color. |
|
:return: A tuple of desaturated (r, g, b) values in [0, 255]. |
|
""" |
|
r, g, b = [x / 255.0 for x in rgb] |
|
h, l, s = rgb_to_hls(r, g, b) |
|
l = factor |
|
new_r, new_g, new_b = hls_to_rgb(h, l, s) |
|
return (int(new_r * 255), int(new_g * 255), int(new_b * 255)) |
|
|
|
|
|
def draw_bbox(image, boxes, color_history=[]): |
|
|
|
colors = [ |
|
[255, 0, 0], |
|
[0, 255, 0], |
|
[0, 0, 255], |
|
[255, 255, 0], |
|
[0, 255, 255], |
|
[255, 0, 255], |
|
[255, 192, 203], |
|
[165, 42, 42], |
|
[255, 165, 0], |
|
[128, 0, 128], |
|
[0, 0, 128], |
|
[128, 0, 0], |
|
[128, 128, 0], |
|
[70, 130, 180], |
|
[173, 216, 230], |
|
[255, 192, 0], |
|
[255, 165, 165], |
|
[255, 20, 147], |
|
] |
|
new_image = image |
|
text = '<region_1>' |
|
font = cv2.FONT_HERSHEY_TRIPLEX |
|
font_scale = 1.0 |
|
thickness = 4 |
|
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness) |
|
for bbox_id, box in enumerate(boxes): |
|
if len(color_history) == 0 : |
|
color = tuple(random.choice(colors)) |
|
else : |
|
color = color_history[bbox_id] |
|
|
|
start_point = int(box[0]), int(box[1]) |
|
end_point = int(box[2]), int(box[3]) |
|
new_image = cv2.rectangle(new_image, start_point, end_point, color, thickness) |
|
if len(color_history) == 0 : |
|
new_image = cv2.putText(new_image, |
|
f'<region {bbox_id + 1}>', |
|
(int(box[0]), int(box[1]) + text_size[1]), font, font_scale, color, thickness) |
|
|
|
return new_image |
|
|
|
|
|
class ImageSketcher(gr.Image): |
|
"""Code is from https://github.com/ttengwang/Caption- |
|
Anything/blob/main/app.py#L32. |
|
|
|
Fix the bug of gradio.Image that cannot upload with tool == 'sketch'. |
|
""" |
|
|
|
is_template = True |
|
|
|
def __init__(self, **kwargs): |
|
super().__init__(tool='boxes', **kwargs) |
|
|
|
def preprocess(self, x): |
|
if x is None: |
|
return x |
|
self.DEFAULT_TEMP_DIR = 'gradio_out/' |
|
if self.tool == 'boxes' and self.source in ['upload', 'webcam']: |
|
if isinstance(x, str): |
|
x = {'image': x, 'boxes': []} |
|
else: |
|
assert isinstance(x, dict) |
|
assert isinstance(x['image'], str) |
|
assert isinstance(x['boxes'], list) |
|
x = super().preprocess(x) |
|
return x |