Spaces:
Running
on
Zero
Running
on
Zero
Show inference time for both models
#2
by
vikhyatk
- opened
app.py
CHANGED
@@ -2,6 +2,7 @@ import random
|
|
2 |
import requests
|
3 |
import json
|
4 |
import ast
|
|
|
5 |
|
6 |
import matplotlib.pyplot as plt
|
7 |
from PIL import Image, ImageDraw, ImageFont
|
@@ -156,6 +157,7 @@ def detect_qwen(image, prompt):
|
|
156 |
}
|
157 |
]
|
158 |
|
|
|
159 |
text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
160 |
image_inputs, video_inputs = process_vision_info(messages)
|
161 |
inputs = processor_qwen(
|
@@ -173,37 +175,41 @@ def detect_qwen(image, prompt):
|
|
173 |
output_text = processor_qwen.batch_decode(
|
174 |
generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
175 |
)[0]
|
|
|
176 |
|
177 |
input_height = inputs['image_grid_thw'][0][1] * 14
|
178 |
input_width = inputs['image_grid_thw'][0][2] * 14
|
179 |
|
180 |
annotated_image = create_annotated_image(image, output_text, input_height, input_width)
|
181 |
|
182 |
-
|
|
|
183 |
|
184 |
|
185 |
@GPU
|
186 |
def detect_moondream(image, prompt, category_input):
|
|
|
187 |
if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
|
188 |
output_text = model_moondream.detect(image=image, object=prompt)
|
189 |
elif category_input == "Visual Grounding + Keypoint Detection":
|
190 |
output_text = model_moondream.point(image=image, object=prompt)
|
191 |
else:
|
192 |
output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
|
|
|
193 |
|
194 |
annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
|
195 |
-
|
196 |
-
return annotated_image, output_text
|
197 |
|
198 |
-
|
|
|
|
|
199 |
def detect(image, prompt_model_1, prompt_model_2, category_input):
|
200 |
STANDARD_SIZE = (1024, 1024)
|
201 |
image.thumbnail(STANDARD_SIZE)
|
202 |
|
203 |
-
annotated_image_model_1, output_text_model_1 = detect_qwen(image, prompt_model_1)
|
204 |
-
annotated_image_model_2, output_text_model_2 = detect_moondream(image, prompt_model_2, category_input)
|
205 |
|
206 |
-
return annotated_image_model_1, output_text_model_1, annotated_image_model_2, output_text_model_2
|
207 |
|
208 |
css_hide_share = """
|
209 |
button#gradio-share-link-button-0 {
|
@@ -253,10 +259,12 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
|
|
253 |
with gr.Column(scale=1):
|
254 |
output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
|
255 |
output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
|
|
|
256 |
|
257 |
with gr.Column(scale=1):
|
258 |
output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
|
259 |
output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
|
|
|
260 |
|
261 |
gr.Markdown("### Examples")
|
262 |
example_prompts = [
|
@@ -276,8 +284,15 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
|
|
276 |
label="Click an example to populate the input"
|
277 |
)
|
278 |
|
279 |
-
generate_btn.click(
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
if __name__ == "__main__":
|
282 |
demo.launch()
|
283 |
|
|
|
2 |
import requests
|
3 |
import json
|
4 |
import ast
|
5 |
+
import time
|
6 |
|
7 |
import matplotlib.pyplot as plt
|
8 |
from PIL import Image, ImageDraw, ImageFont
|
|
|
157 |
}
|
158 |
]
|
159 |
|
160 |
+
t0 = time.perf_counter()
|
161 |
text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
162 |
image_inputs, video_inputs = process_vision_info(messages)
|
163 |
inputs = processor_qwen(
|
|
|
175 |
output_text = processor_qwen.batch_decode(
|
176 |
generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
177 |
)[0]
|
178 |
+
elapsed_ms = (time.perf_counter() - t0) * 1_000
|
179 |
|
180 |
input_height = inputs['image_grid_thw'][0][1] * 14
|
181 |
input_width = inputs['image_grid_thw'][0][2] * 14
|
182 |
|
183 |
annotated_image = create_annotated_image(image, output_text, input_height, input_width)
|
184 |
|
185 |
+
time_taken = f"**Inference time ({model_qwen_name}):** {elapsed_ms:.0f} ms"
|
186 |
+
return annotated_image, output_text, time_taken
|
187 |
|
188 |
|
189 |
@GPU
|
190 |
def detect_moondream(image, prompt, category_input):
|
191 |
+
t0 = time.perf_counter()
|
192 |
if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
|
193 |
output_text = model_moondream.detect(image=image, object=prompt)
|
194 |
elif category_input == "Visual Grounding + Keypoint Detection":
|
195 |
output_text = model_moondream.point(image=image, object=prompt)
|
196 |
else:
|
197 |
output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
|
198 |
+
elapsed_ms = (time.perf_counter() - t0) * 1_000
|
199 |
|
200 |
annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
|
|
|
|
|
201 |
|
202 |
+
time_taken = f"**Inference time ({model_moondream_name}):** {elapsed_ms:.0f} ms"
|
203 |
+
return annotated_image, output_text, time_taken
|
204 |
+
|
205 |
def detect(image, prompt_model_1, prompt_model_2, category_input):
|
206 |
STANDARD_SIZE = (1024, 1024)
|
207 |
image.thumbnail(STANDARD_SIZE)
|
208 |
|
209 |
+
annotated_image_model_1, output_text_model_1, timing_1 = detect_qwen(image, prompt_model_1)
|
210 |
+
annotated_image_model_2, output_text_model_2, timing_2 = detect_moondream(image, prompt_model_2, category_input)
|
211 |
|
212 |
+
return annotated_image_model_1, output_text_model_1, timing_1, annotated_image_model_2, output_text_model_2, timing_2
|
213 |
|
214 |
css_hide_share = """
|
215 |
button#gradio-share-link-button-0 {
|
|
|
259 |
with gr.Column(scale=1):
|
260 |
output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
|
261 |
output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
|
262 |
+
output_time_model_1 = gr.Markdown()
|
263 |
|
264 |
with gr.Column(scale=1):
|
265 |
output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
|
266 |
output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
|
267 |
+
output_time_model_2 = gr.Markdown()
|
268 |
|
269 |
gr.Markdown("### Examples")
|
270 |
example_prompts = [
|
|
|
284 |
label="Click an example to populate the input"
|
285 |
)
|
286 |
|
287 |
+
generate_btn.click(
|
288 |
+
fn=detect,
|
289 |
+
inputs=[image_input, prompt_input_model_1, prompt_input_model_2, category_input],
|
290 |
+
outputs=[
|
291 |
+
output_image_model_1, output_textbox_model_1, output_time_model_1,
|
292 |
+
output_image_model_2, output_textbox_model_2, output_time_model_2
|
293 |
+
]
|
294 |
+
)
|
295 |
+
|
296 |
if __name__ == "__main__":
|
297 |
demo.launch()
|
298 |
|