Spaces:

ml-energy
/

leaderboard

Running

App Files Files Community

Jae-Won Chung commited on Sep 20, 2024

Commit

76bf85e

1 Parent(s): 4e4fca8

Leaderboard tweaks

Browse files

Files changed (2) hide show

app.py +49 -20
data/diffusion/image-to-video/models.json +1 -1

app.py CHANGED Viewed

@@ -229,6 +229,13 @@ class LLMChatTableManager(LLMTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -242,10 +249,6 @@ class LLMChatTableManager(LLMTableManager):
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -290,6 +293,13 @@ class LLMCodeTableManager(LLMTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -303,10 +313,6 @@ class LLMCodeTableManager(LLMTableManager):
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -350,6 +356,13 @@ class VLMChatTableManager(LLMTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -363,10 +376,6 @@ class VLMChatTableManager(LLMTableManager):
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -499,7 +508,7 @@ class DiffusionTableManager(TableManager):
         )
         if not detail_mode:
-            core_columns = ["Model", "Denoising params", "GPU", "Denoising steps", "Resolution", "Frames", self.energy_col]
             readable_name_mapping = {
                 "Denoising params": "Denoising parameters (Billions)",
                 "GPU": "GPU model",
@@ -521,7 +530,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion text-to-image generation</h2></br>
             <p style="font-size: 16px">
             Diffusion models generate images that align with input text prompts.
@@ -537,6 +548,9 @@ class DiffusionT2ITableManager(DiffusionTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -557,6 +571,7 @@ class DiffusionT2ITableManager(DiffusionTableManager):
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
@@ -575,7 +590,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion text-to-video generation</h2></br>
             <p style="font-size: 16px">
             Diffusion models generate videos that align with input text prompts.
@@ -591,6 +608,9 @@ class DiffusionT2VTableManager(DiffusionTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -612,6 +632,8 @@ class DiffusionT2VTableManager(DiffusionTableManager):
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
@@ -630,7 +652,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion image-to-video generation</h2></br>
             <p style="font-size: 16px">
             Diffusion models generate videos given an input image (and sometimes alongside with text).
@@ -646,6 +670,9 @@ class DiffusionI2VTableManager(DiffusionTableManager):
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -667,6 +694,8 @@ class DiffusionI2VTableManager(DiffusionTableManager):
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
@@ -674,7 +703,7 @@ class DiffusionI2VTableManager(DiffusionTableManager):
         return text
     def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
-        return {"Batch latency (s)": (0.0, 120.0, 1.0, 45.0)}
 class LegacyTableManager:
@@ -718,7 +747,7 @@ class LegacyTableManager:
         self.full_df = df
         # Default view of the table is to only show the first options.
-        self.set_filter_get_df(detail_mode=False)
     def _read_tables(self, data_dir: str) -> pd.DataFrame:
         """Read tables."""
@@ -777,7 +806,7 @@ class LegacyTableManager:
             gr.Dropdown.update(choices=["None", *columns]),
         ]
-    def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
         """Set the current set of filters and return the filtered DataFrame."""
         # If the filter is empty, we default to the first choice for each key.
         if not filters:
@@ -1027,7 +1056,7 @@ def consumed_more_energy_message(energy_a, energy_b):
 # Colosseum event handlers
 def on_load():
     """Intialize the dataframe, shuffle the model preference dropdown choices."""
-    dataframe = global_ltbm.set_filter_get_df(detail_mode=False)
     dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
     return dataframe, *dataframes

     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
         )
         if not detail_mode:
+            core_columns = ["Model", "Denoising params", "GPU", "Resolution", "Frames", self.energy_col]
             readable_name_mapping = {
                 "Denoising params": "Denoising parameters (Billions)",
                 "GPU": "GPU model",
     def get_intro_text(self) -> str:
         text = """
+            <h2>How much energy do GenAI models consume?</h2>
+            <h3>Diffusion text-to-image generation</h3>
             <p style="font-size: 16px">
             Diffusion models generate images that align with input text prompts.
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per image.
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
+                - **Resolution**: Resolution of the generated image.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
     def get_intro_text(self) -> str:
         text = """
+            <h2>How much energy do GenAI models consume?</h2>
+            <h3>Diffusion text-to-video generation</h3>
             <p style="font-size: 16px">
             Diffusion models generate videos that align with input text prompts.
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
+                - **Frames**: Number of frames in the generated video.
+                - **Resolution**: Resolution of the generated video.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
     def get_intro_text(self) -> str:
         text = """
+            <h2>How much energy do GenAI models consume?</h2>
+            <h3>Diffusion image-to-video generation</h3>
             <p style="font-size: 16px">
             Diffusion models generate videos given an input image (and sometimes alongside with text).
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
+                - **Frames**: Number of frames in the generated video.
+                - **Resolution**: Resolution of the generated video.
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
         return text
     def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
+        return {"Batch latency (s)": (0.0, 120.0, 1.0, 60.0)}
 class LegacyTableManager:
         self.full_df = df
         # Default view of the table is to only show the first options.
+        self.set_filter_get_df()
     def _read_tables(self, data_dir: str) -> pd.DataFrame:
         """Read tables."""
             gr.Dropdown.update(choices=["None", *columns]),
         ]
+    def set_filter_get_df(self, *filters) -> pd.DataFrame:
         """Set the current set of filters and return the filtered DataFrame."""
         # If the filter is empty, we default to the first choice for each key.
         if not filters:
 # Colosseum event handlers
 def on_load():
     """Intialize the dataframe, shuffle the model preference dropdown choices."""
+    dataframe = global_ltbm.set_filter_get_df()
     dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
     return dataframe, *dataframes

data/diffusion/image-to-video/models.json CHANGED Viewed

@@ -15,7 +15,7 @@
   },
   "stabilityai/stable-video-diffusion-img2vid-xt": {
     "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
-    "nickname": "Stable Video Diffusion xt",
     "total_params": 2.3,
     "denoising_params": 1.5,
     "resolution": "1024x576"

   },
   "stabilityai/stable-video-diffusion-img2vid-xt": {
     "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
+    "nickname": "Stable Video Diffusion XT",
     "total_params": 2.3,
     "denoising_params": 1.5,
     "resolution": "1024x576"