Show inference time for both models

#2
Files changed (1) hide show
  1. app.py +24 -9
app.py CHANGED
@@ -2,6 +2,7 @@ import random
2
  import requests
3
  import json
4
  import ast
 
5
 
6
  import matplotlib.pyplot as plt
7
  from PIL import Image, ImageDraw, ImageFont
@@ -156,6 +157,7 @@ def detect_qwen(image, prompt):
156
  }
157
  ]
158
 
 
159
  text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
160
  image_inputs, video_inputs = process_vision_info(messages)
161
  inputs = processor_qwen(
@@ -173,37 +175,41 @@ def detect_qwen(image, prompt):
173
  output_text = processor_qwen.batch_decode(
174
  generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
175
  )[0]
 
176
 
177
  input_height = inputs['image_grid_thw'][0][1] * 14
178
  input_width = inputs['image_grid_thw'][0][2] * 14
179
 
180
  annotated_image = create_annotated_image(image, output_text, input_height, input_width)
181
 
182
- return annotated_image, output_text
 
183
 
184
 
185
  @GPU
186
  def detect_moondream(image, prompt, category_input):
 
187
  if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
188
  output_text = model_moondream.detect(image=image, object=prompt)
189
  elif category_input == "Visual Grounding + Keypoint Detection":
190
  output_text = model_moondream.point(image=image, object=prompt)
191
  else:
192
  output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
 
193
 
194
  annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
195
-
196
- return annotated_image, output_text
197
 
198
- @GPU
 
 
199
  def detect(image, prompt_model_1, prompt_model_2, category_input):
200
  STANDARD_SIZE = (1024, 1024)
201
  image.thumbnail(STANDARD_SIZE)
202
 
203
- annotated_image_model_1, output_text_model_1 = detect_qwen(image, prompt_model_1)
204
- annotated_image_model_2, output_text_model_2 = detect_moondream(image, prompt_model_2, category_input)
205
 
206
- return annotated_image_model_1, output_text_model_1, annotated_image_model_2, output_text_model_2
207
 
208
  css_hide_share = """
209
  button#gradio-share-link-button-0 {
@@ -253,10 +259,12 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
253
  with gr.Column(scale=1):
254
  output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
255
  output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
 
256
 
257
  with gr.Column(scale=1):
258
  output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
259
  output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
 
260
 
261
  gr.Markdown("### Examples")
262
  example_prompts = [
@@ -276,8 +284,15 @@ with gr.Blocks(theme=Ocean(), css=css_hide_share) as demo:
276
  label="Click an example to populate the input"
277
  )
278
 
279
- generate_btn.click(fn=detect, inputs=[image_input, prompt_input_model_1, prompt_input_model_2, category_input], outputs=[output_image_model_1, output_textbox_model_1, output_image_model_2, output_textbox_model_2])
280
-
 
 
 
 
 
 
 
281
  if __name__ == "__main__":
282
  demo.launch()
283
 
 
2
  import requests
3
  import json
4
  import ast
5
+ import time
6
 
7
  import matplotlib.pyplot as plt
8
  from PIL import Image, ImageDraw, ImageFont
 
157
  }
158
  ]
159
 
160
+ t0 = time.perf_counter()
161
  text = processor_qwen.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
162
  image_inputs, video_inputs = process_vision_info(messages)
163
  inputs = processor_qwen(
 
175
  output_text = processor_qwen.batch_decode(
176
  generated_ids_trimmed, do_sample=True, skip_special_tokens=True, clean_up_tokenization_spaces=False
177
  )[0]
178
+ elapsed_ms = (time.perf_counter() - t0) * 1_000
179
 
180
  input_height = inputs['image_grid_thw'][0][1] * 14
181
  input_width = inputs['image_grid_thw'][0][2] * 14
182
 
183
  annotated_image = create_annotated_image(image, output_text, input_height, input_width)
184
 
185
+ time_taken = f"**Inference time ({model_qwen_name}):** {elapsed_ms:.0f} ms"
186
+ return annotated_image, output_text, time_taken
187
 
188
 
189
  @GPU
190
  def detect_moondream(image, prompt, category_input):
191
+ t0 = time.perf_counter()
192
  if category_input in ["Object Detection", "Visual Grounding + Object Detection"]:
193
  output_text = model_moondream.detect(image=image, object=prompt)
194
  elif category_input == "Visual Grounding + Keypoint Detection":
195
  output_text = model_moondream.point(image=image, object=prompt)
196
  else:
197
  output_text = model_moondream.query(image=image, question=prompt, reasoning=True)
198
+ elapsed_ms = (time.perf_counter() - t0) * 1_000
199
 
200
  annotated_image = create_annotated_image_normalized(image=image, json_data=output_text, label="object", explicit_color=None)
 
 
201
 
202
+ time_taken = f"**Inference time ({model_moondream_name}):** {elapsed_ms:.0f} ms"
203
+ return annotated_image, output_text, time_taken
204
+
205
  def detect(image, prompt_model_1, prompt_model_2, category_input):
206
  STANDARD_SIZE = (1024, 1024)
207
  image.thumbnail(STANDARD_SIZE)
208
 
209
+ annotated_image_model_1, output_text_model_1, timing_1 = detect_qwen(image, prompt_model_1)
210
+ annotated_image_model_2, output_text_model_2, timing_2 = detect_moondream(image, prompt_model_2, category_input)
211
 
212
+ return annotated_image_model_1, output_text_model_1, timing_1, annotated_image_model_2, output_text_model_2, timing_2
213
 
214
  css_hide_share = """
215
  button#gradio-share-link-button-0 {
 
259
  with gr.Column(scale=1):
260
  output_image_model_1 = gr.Image(type="pil", label=f"Annotated image for {model_qwen_name}", height=400)
261
  output_textbox_model_1 = gr.Textbox(label=f"Model response for {model_qwen_name}", lines=10)
262
+ output_time_model_1 = gr.Markdown()
263
 
264
  with gr.Column(scale=1):
265
  output_image_model_2 = gr.Image(type="pil", label=f"Annotated image for {model_moondream_name}", height=400)
266
  output_textbox_model_2 = gr.Textbox(label=f"Model response for {model_moondream_name}", lines=10)
267
+ output_time_model_2 = gr.Markdown()
268
 
269
  gr.Markdown("### Examples")
270
  example_prompts = [
 
284
  label="Click an example to populate the input"
285
  )
286
 
287
+ generate_btn.click(
288
+ fn=detect,
289
+ inputs=[image_input, prompt_input_model_1, prompt_input_model_2, category_input],
290
+ outputs=[
291
+ output_image_model_1, output_textbox_model_1, output_time_model_1,
292
+ output_image_model_2, output_textbox_model_2, output_time_model_2
293
+ ]
294
+ )
295
+
296
  if __name__ == "__main__":
297
  demo.launch()
298