Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
•
76bf85e
1
Parent(s):
4e4fca8
Leaderboard tweaks
Browse files- app.py +49 -20
- data/diffusion/image-to-video/models.json +1 -1
app.py
CHANGED
@@ -229,6 +229,13 @@ class LLMChatTableManager(LLMTableManager):
|
|
229 |
def get_detail_text(self, detail_mode: bool) -> str:
|
230 |
if detail_mode:
|
231 |
text = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
Columns
|
233 |
- **Model**: The name of the model.
|
234 |
- **Params (B)**: Number of parameters in the model.
|
@@ -242,10 +249,6 @@ class LLMChatTableManager(LLMTableManager):
|
|
242 |
- **Avg BS**: Average batch size of the serving engine over time.
|
243 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
244 |
|
245 |
-
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
246 |
-
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
247 |
-
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
248 |
-
|
249 |
For more detailed information, please take a look at the **About** tab.
|
250 |
"""
|
251 |
else:
|
@@ -290,6 +293,13 @@ class LLMCodeTableManager(LLMTableManager):
|
|
290 |
def get_detail_text(self, detail_mode: bool) -> str:
|
291 |
if detail_mode:
|
292 |
text = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
Columns
|
294 |
- **Model**: The name of the model.
|
295 |
- **Params (B)**: Number of parameters in the model.
|
@@ -303,10 +313,6 @@ class LLMCodeTableManager(LLMTableManager):
|
|
303 |
- **Avg BS**: Average batch size of the serving engine over time.
|
304 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
305 |
|
306 |
-
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
307 |
-
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
308 |
-
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
309 |
-
|
310 |
For more detailed information, please take a look at the **About** tab.
|
311 |
"""
|
312 |
else:
|
@@ -350,6 +356,13 @@ class VLMChatTableManager(LLMTableManager):
|
|
350 |
def get_detail_text(self, detail_mode: bool) -> str:
|
351 |
if detail_mode:
|
352 |
text = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
Columns
|
354 |
- **Model**: The name of the model.
|
355 |
- **Params (B)**: Number of parameters in the model.
|
@@ -363,10 +376,6 @@ class VLMChatTableManager(LLMTableManager):
|
|
363 |
- **Avg BS**: Average batch size of the serving engine over time.
|
364 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
365 |
|
366 |
-
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
367 |
-
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
368 |
-
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
369 |
-
|
370 |
For more detailed information, please take a look at the **About** tab.
|
371 |
"""
|
372 |
else:
|
@@ -499,7 +508,7 @@ class DiffusionTableManager(TableManager):
|
|
499 |
)
|
500 |
|
501 |
if not detail_mode:
|
502 |
-
core_columns = ["Model", "Denoising params", "GPU", "
|
503 |
readable_name_mapping = {
|
504 |
"Denoising params": "Denoising parameters (Billions)",
|
505 |
"GPU": "GPU model",
|
@@ -521,7 +530,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
|
|
521 |
|
522 |
def get_intro_text(self) -> str:
|
523 |
text = """
|
524 |
-
<h2>
|
|
|
|
|
525 |
|
526 |
<p style="font-size: 16px">
|
527 |
Diffusion models generate images that align with input text prompts.
|
@@ -537,6 +548,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
|
|
537 |
def get_detail_text(self, detail_mode: bool) -> str:
|
538 |
if detail_mode:
|
539 |
text = """
|
|
|
|
|
|
|
540 |
Columns
|
541 |
- **Model**: The name of the model.
|
542 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
@@ -557,6 +571,7 @@ class DiffusionT2ITableManager(DiffusionTableManager):
|
|
557 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
|
558 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
559 |
- **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
|
|
|
560 |
|
561 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
562 |
Also, for more detailed information, please take a look at the **About** tab.
|
@@ -575,7 +590,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
|
|
575 |
|
576 |
def get_intro_text(self) -> str:
|
577 |
text = """
|
578 |
-
<h2>
|
|
|
|
|
579 |
|
580 |
<p style="font-size: 16px">
|
581 |
Diffusion models generate videos that align with input text prompts.
|
@@ -591,6 +608,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
|
|
591 |
def get_detail_text(self, detail_mode: bool) -> str:
|
592 |
if detail_mode:
|
593 |
text = """
|
|
|
|
|
|
|
594 |
Columns
|
595 |
- **Model**: The name of the model.
|
596 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
@@ -612,6 +632,8 @@ class DiffusionT2VTableManager(DiffusionTableManager):
|
|
612 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
|
613 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
614 |
- **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
|
|
|
|
|
615 |
|
616 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
617 |
Also, for more detailed information, please take a look at the **About** tab.
|
@@ -630,7 +652,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
|
|
630 |
|
631 |
def get_intro_text(self) -> str:
|
632 |
text = """
|
633 |
-
<h2>
|
|
|
|
|
634 |
|
635 |
<p style="font-size: 16px">
|
636 |
Diffusion models generate videos given an input image (and sometimes alongside with text).
|
@@ -646,6 +670,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
|
|
646 |
def get_detail_text(self, detail_mode: bool) -> str:
|
647 |
if detail_mode:
|
648 |
text = """
|
|
|
|
|
|
|
649 |
Columns
|
650 |
- **Model**: The name of the model.
|
651 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
@@ -667,6 +694,8 @@ class DiffusionI2VTableManager(DiffusionTableManager):
|
|
667 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
|
668 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
669 |
- **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
|
|
|
|
|
670 |
|
671 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
672 |
Also, for more detailed information, please take a look at the **About** tab.
|
@@ -674,7 +703,7 @@ class DiffusionI2VTableManager(DiffusionTableManager):
|
|
674 |
return text
|
675 |
|
676 |
def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
|
677 |
-
return {"Batch latency (s)": (0.0, 120.0, 1.0,
|
678 |
|
679 |
|
680 |
class LegacyTableManager:
|
@@ -718,7 +747,7 @@ class LegacyTableManager:
|
|
718 |
self.full_df = df
|
719 |
|
720 |
# Default view of the table is to only show the first options.
|
721 |
-
self.set_filter_get_df(
|
722 |
|
723 |
def _read_tables(self, data_dir: str) -> pd.DataFrame:
|
724 |
"""Read tables."""
|
@@ -777,7 +806,7 @@ class LegacyTableManager:
|
|
777 |
gr.Dropdown.update(choices=["None", *columns]),
|
778 |
]
|
779 |
|
780 |
-
def set_filter_get_df(self,
|
781 |
"""Set the current set of filters and return the filtered DataFrame."""
|
782 |
# If the filter is empty, we default to the first choice for each key.
|
783 |
if not filters:
|
@@ -1027,7 +1056,7 @@ def consumed_more_energy_message(energy_a, energy_b):
|
|
1027 |
# Colosseum event handlers
|
1028 |
def on_load():
|
1029 |
"""Intialize the dataframe, shuffle the model preference dropdown choices."""
|
1030 |
-
dataframe = global_ltbm.set_filter_get_df(
|
1031 |
dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
|
1032 |
return dataframe, *dataframes
|
1033 |
|
|
|
229 |
def get_detail_text(self, detail_mode: bool) -> str:
|
230 |
if detail_mode:
|
231 |
text = """
|
232 |
+
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
233 |
+
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
234 |
+
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
235 |
+
|
236 |
+
Each row corresponds to one model, given a constraint on the maximum average TPOT.
|
237 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
|
238 |
+
|
239 |
Columns
|
240 |
- **Model**: The name of the model.
|
241 |
- **Params (B)**: Number of parameters in the model.
|
|
|
249 |
- **Avg BS**: Average batch size of the serving engine over time.
|
250 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
251 |
|
|
|
|
|
|
|
|
|
252 |
For more detailed information, please take a look at the **About** tab.
|
253 |
"""
|
254 |
else:
|
|
|
293 |
def get_detail_text(self, detail_mode: bool) -> str:
|
294 |
if detail_mode:
|
295 |
text = """
|
296 |
+
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
297 |
+
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
298 |
+
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
299 |
+
|
300 |
+
Each row corresponds to one model, given a constraint on the maximum average TPOT.
|
301 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
|
302 |
+
|
303 |
Columns
|
304 |
- **Model**: The name of the model.
|
305 |
- **Params (B)**: Number of parameters in the model.
|
|
|
313 |
- **Avg BS**: Average batch size of the serving engine over time.
|
314 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
315 |
|
|
|
|
|
|
|
|
|
316 |
For more detailed information, please take a look at the **About** tab.
|
317 |
"""
|
318 |
else:
|
|
|
356 |
def get_detail_text(self, detail_mode: bool) -> str:
|
357 |
if detail_mode:
|
358 |
text = """
|
359 |
+
**TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
|
360 |
+
An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
|
361 |
+
You can tweak the TPOT slider to adjust the target average TPOT for the models.
|
362 |
+
|
363 |
+
Each row corresponds to one model, given a constraint on the maximum average TPOT.
|
364 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
|
365 |
+
|
366 |
Columns
|
367 |
- **Model**: The name of the model.
|
368 |
- **Params (B)**: Number of parameters in the model.
|
|
|
376 |
- **Avg BS**: Average batch size of the serving engine over time.
|
377 |
- **Max BS**: Maximum batch size configuration of the serving engine.
|
378 |
|
|
|
|
|
|
|
|
|
379 |
For more detailed information, please take a look at the **About** tab.
|
380 |
"""
|
381 |
else:
|
|
|
508 |
)
|
509 |
|
510 |
if not detail_mode:
|
511 |
+
core_columns = ["Model", "Denoising params", "GPU", "Resolution", "Frames", self.energy_col]
|
512 |
readable_name_mapping = {
|
513 |
"Denoising params": "Denoising parameters (Billions)",
|
514 |
"GPU": "GPU model",
|
|
|
530 |
|
531 |
def get_intro_text(self) -> str:
|
532 |
text = """
|
533 |
+
<h2>How much energy do GenAI models consume?</h2>
|
534 |
+
|
535 |
+
<h3>Diffusion text-to-image generation</h3>
|
536 |
|
537 |
<p style="font-size: 16px">
|
538 |
Diffusion models generate images that align with input text prompts.
|
|
|
548 |
def get_detail_text(self, detail_mode: bool) -> str:
|
549 |
if detail_mode:
|
550 |
text = """
|
551 |
+
Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
|
552 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per image.
|
553 |
+
|
554 |
Columns
|
555 |
- **Model**: The name of the model.
|
556 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
|
|
571 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
|
572 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
573 |
- **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
|
574 |
+
- **Resolution**: Resolution of the generated image.
|
575 |
|
576 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
577 |
Also, for more detailed information, please take a look at the **About** tab.
|
|
|
590 |
|
591 |
def get_intro_text(self) -> str:
|
592 |
text = """
|
593 |
+
<h2>How much energy do GenAI models consume?</h2>
|
594 |
+
|
595 |
+
<h3>Diffusion text-to-video generation</h3>
|
596 |
|
597 |
<p style="font-size: 16px">
|
598 |
Diffusion models generate videos that align with input text prompts.
|
|
|
608 |
def get_detail_text(self, detail_mode: bool) -> str:
|
609 |
if detail_mode:
|
610 |
text = """
|
611 |
+
Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
|
612 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
|
613 |
+
|
614 |
Columns
|
615 |
- **Model**: The name of the model.
|
616 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
|
|
632 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
|
633 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
634 |
- **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
|
635 |
+
- **Frames**: Number of frames in the generated video.
|
636 |
+
- **Resolution**: Resolution of the generated video.
|
637 |
|
638 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
639 |
Also, for more detailed information, please take a look at the **About** tab.
|
|
|
652 |
|
653 |
def get_intro_text(self) -> str:
|
654 |
text = """
|
655 |
+
<h2>How much energy do GenAI models consume?</h2>
|
656 |
+
|
657 |
+
<h3>Diffusion image-to-video generation</h3>
|
658 |
|
659 |
<p style="font-size: 16px">
|
660 |
Diffusion models generate videos given an input image (and sometimes alongside with text).
|
|
|
670 |
def get_detail_text(self, detail_mode: bool) -> str:
|
671 |
if detail_mode:
|
672 |
text = """
|
673 |
+
Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
|
674 |
+
If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
|
675 |
+
|
676 |
Columns
|
677 |
- **Model**: The name of the model.
|
678 |
- **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
|
|
|
694 |
- **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
|
695 |
- **GPU model**: Name of the GPU model used for benchmarking.
|
696 |
- **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
|
697 |
+
- **Frames**: Number of frames in the generated video.
|
698 |
+
- **Resolution**: Resolution of the generated video.
|
699 |
|
700 |
Checking "Show more technical details" above the table will reveal more detailed columns.
|
701 |
Also, for more detailed information, please take a look at the **About** tab.
|
|
|
703 |
return text
|
704 |
|
705 |
def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
|
706 |
+
return {"Batch latency (s)": (0.0, 120.0, 1.0, 60.0)}
|
707 |
|
708 |
|
709 |
class LegacyTableManager:
|
|
|
747 |
self.full_df = df
|
748 |
|
749 |
# Default view of the table is to only show the first options.
|
750 |
+
self.set_filter_get_df()
|
751 |
|
752 |
def _read_tables(self, data_dir: str) -> pd.DataFrame:
|
753 |
"""Read tables."""
|
|
|
806 |
gr.Dropdown.update(choices=["None", *columns]),
|
807 |
]
|
808 |
|
809 |
+
def set_filter_get_df(self, *filters) -> pd.DataFrame:
|
810 |
"""Set the current set of filters and return the filtered DataFrame."""
|
811 |
# If the filter is empty, we default to the first choice for each key.
|
812 |
if not filters:
|
|
|
1056 |
# Colosseum event handlers
|
1057 |
def on_load():
|
1058 |
"""Intialize the dataframe, shuffle the model preference dropdown choices."""
|
1059 |
+
dataframe = global_ltbm.set_filter_get_df()
|
1060 |
dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
|
1061 |
return dataframe, *dataframes
|
1062 |
|
data/diffusion/image-to-video/models.json
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
},
|
16 |
"stabilityai/stable-video-diffusion-img2vid-xt": {
|
17 |
"url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
|
18 |
-
"nickname": "Stable Video Diffusion
|
19 |
"total_params": 2.3,
|
20 |
"denoising_params": 1.5,
|
21 |
"resolution": "1024x576"
|
|
|
15 |
},
|
16 |
"stabilityai/stable-video-diffusion-img2vid-xt": {
|
17 |
"url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
|
18 |
+
"nickname": "Stable Video Diffusion XT",
|
19 |
"total_params": 2.3,
|
20 |
"denoising_params": 1.5,
|
21 |
"resolution": "1024x576"
|