Spaces:

helblazer811
/

ConceptAttention

Running on Zero

App Files Files Community

helblazer811 commited on 2 days ago

Commit

3a5de53

1 Parent(s): 5f5f2bb

Added cross attention to the UI.

Browse files

Files changed (2) hide show

app.py +198 -83
concept_attention/concept_attention_pipeline.py +105 -76

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import spaces
 import gradio as gr
 from PIL import Image
 import math
 from concept_attention import ConceptAttentionFluxPipeline
@@ -14,31 +16,53 @@ EXAMPLES = [
         "tree, dog, grass, background",  # words
         42,  # seed
     ],
-    [
-        "A dragon",  # prompt
-        "dragon, sky, rock, cloud",  # words
-        42,  # seed
-    ],
-    [
-        "A hot air balloon",  # prompt
-        "balloon, sky, water, tree",  # words
-        42,  # seed
-    ]
 ]
-pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
 @spaces.GPU(duration=60)
-def process_inputs(prompt, word_list, seed, layer_start_index, timestep_start_index):
-    print("Processing inputs")
-    assert layer_start_index is not None
-    assert timestep_start_index is not None
     prompt = prompt.strip()
-    if not word_list.strip():
-        gr.exceptions.InputError("words", "Please enter comma-separated words")
-    concepts = [w.strip() for w in word_list.split(",")]
     if len(concepts) == 0:
         raise gr.exceptions.InputError("words", "Please enter at least 1 concept")
@@ -59,101 +83,192 @@ def process_inputs(prompt, word_list, seed, layer_start_index, timestep_start_in
     )
     output_image = pipeline_output.image
-    concept_heatmaps = pipeline_output.concept_heatmaps
-    concept_heatmaps = [heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST) for heatmap in concept_heatmaps]
-    heatmaps_and_labels = [(concept_heatmaps[concept_index], concepts[concept_index]) for concept_index in range(len(concepts))]
-    all_images_and_labels = [(output_image, "Generated Image")] + heatmaps_and_labels
-    num_rows = math.ceil(len(all_images_and_labels) / COLUMNS)
-    print(num_rows)
-    return all_images_and_labels, num_rows
 with gr.Blocks(
     css="""
-    .container { max-width: 1200px; margin: 0 auto; padding: 20px; }
-    .title { text-align: center; margin-bottom: 10px; }
-    .authors { text-align: center; margin-bottom: 10px; }
-    .affiliations { text-align: center; color: #666; margin-bottom: 10px; }
-    .abstract { text-align: center; margin-bottom: 40px; }
-"""
 ) as demo:
     with gr.Column(elem_classes="container"):
-        gr.Markdown("# ConceptAttention: Diffusion Transformers Learn Highly Interpretable Features", elem_classes="title")
-        gr.Markdown("### Alec Helbling¹, Tuna Meral², Ben Hoover¹³, Pinar Yanardag², Duen Horng (Polo) Chau¹", elem_classes="authors")
-        gr.Markdown("### ¹Georgia Tech · ²Virginia Tech · ³IBM Research", elem_classes="affiliations")
-        gr.Markdown(
-            """
-                We introduce ConceptAttention, an approach to interpreting the intermediate representations of diffusion transformers.
-                The user just gives a list of textual concepts and ConceptAttention will produce a set of saliency maps depicting
-                the location and intensity of these concepts in generated images. Check out our paper: [here](https://arxiv.org/abs/2502.04320).
-            """,
-            elem_classes="abstract"
-        )
-        with gr.Row(scale=1):
-            prompt = gr.Textbox(
-                label="Enter your prompt",
-                placeholder="Enter your prompt",
-                value=EXAMPLES[0][0],
-                scale=4,
-                # show_label=True,
-                container=False
-                # height="80px"
-            )
-            words = gr.Textbox(
-                label="Enter a list of concepts (comma-separated)",
-                placeholder="Enter a list of concepts (comma-separated)",
-                value=EXAMPLES[0][1],
-                scale=4,
-                # show_label=True,
-                container=False
-                # height="80px"
-            )
-            submit_btn = gr.Button(
-                "Run",
-                min_width="100px",
-                scale=1
-            )
         num_rows_state = gr.State(value=1)  # Initial number of rows
         # generated_image = gr.Image(label="Generated Image", elem_classes="input-image")
-        gallery = gr.Gallery(
-            label="Generated images",
-            show_label=True,
-            # elem_id="gallery",
-            columns=COLUMNS,
-            rows=1,
-            # object_fit="contain",
-            height="auto",
-            elem_classes="gallery"
-        )
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
             layer_start_index = gr.Slider(minimum=0, maximum=18, step=1, label="Layer Start Index", value=10)
             timestep_start_index = gr.Slider(minimum=0, maximum=4, step=1, label="Timestep Start Index", value=2)
         submit_btn.click(
             fn=process_inputs,
-            inputs=[prompt, words, seed, layer_start_index, timestep_start_index],
-            outputs=[gallery, num_rows_state]
         )
-        gr.Examples(examples=EXAMPLES, inputs=[prompt, words, seed, layer_start_index, timestep_start_index], outputs=[gallery, num_rows_state], fn=process_inputs, cache_examples=False)
         # num_rows_state.change(
         #     fn=lambda rows: gr.Gallery.update(rows=int(rows)),
         #     inputs=[num_rows_state],
         #     outputs=[gallery]
         # )
-        # Automatically process the first example on launch
-        demo.load(process_inputs, inputs=[prompt, words, seed, layer_start_index, timestep_start_index], outputs=[gallery, num_rows_state])
 if __name__ == "__main__":
     demo.launch(max_threads=1)

 import gradio as gr
 from PIL import Image
 import math
+import io
+import base64
 from concept_attention import ConceptAttentionFluxPipeline
         "tree, dog, grass, background",  # words
         42,  # seed
     ],
+    # [
+    #     "A dragon",  # prompt
+    #     "dragon, sky, rock, cloud",  # words
+    #     42,  # seed
+    # ],
+    # [
+    #     "A hot air balloon",  # prompt
+    #     "balloon, sky, water, tree",  # words
+    #     42,  # seed
+    # ]
 ]
+def update_default_concepts(prompt):
+    default_concepts = {
+        "A dog by a tree": ["dog", "grass", "tree", "background"],
+        "A dragon": ["dragon", "sky", "rock", "cloud"],
+        "A hot air balloon": ["balloon", "sky", "water", "tree"]
+    }
+    return gr.update(value=default_concepts.get(prompt, []))
+pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda", offload_model=True)
+def convert_pil_to_bytes(img):
+    img = img.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST)
+    buffered = io.BytesIO()
+    img.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    return img_str
 @spaces.GPU(duration=60)
+def process_inputs(prompt, concepts, seed, layer_start_index, timestep_start_index):
+    # print("Processing inputs")
+    # assert layer_start_index is not None
+    # assert timestep_start_index is not None
+    if not prompt.strip():
+        raise gr.exceptions.InputError("prompt", "Please enter a prompt")
     prompt = prompt.strip()
+    print(concepts)
+    # if not word_list.strip():
+    #     gr.exceptions.InputError("words", "Please enter comma-separated words")
+    # concepts = [w.strip() for w in word_list.split(",")]
     if len(concepts) == 0:
         raise gr.exceptions.InputError("words", "Please enter at least 1 concept")
     )
     output_image = pipeline_output.image
+    output_space_heatmaps = pipeline_output.concept_heatmaps
+    output_space_heatmaps = [heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST) for heatmap in output_space_heatmaps]
+    output_space_maps_and_labels = [(output_space_heatmaps[concept_index], concepts[concept_index]) for concept_index in range(len(concepts))]
+    cross_attention_heatmaps = pipeline_output.cross_attention_maps
+    cross_attention_heatmaps = [heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST) for heatmap in cross_attention_heatmaps]
+    cross_attention_maps_and_labels = [(cross_attention_heatmaps[concept_index], concepts[concept_index]) for concept_index in range(len(concepts))]
+    # heatmaps_and_labels = [(concept_heatmaps[concept_index], concepts[concept_index]) for concept_index in range(len(concepts))]
+    # all_images_and_labels = [(output_image, "Generated Image")] + heatmaps_and_labels
+    # num_rows = math.ceil(len(all_images_and_labels) / COLUMNS)
+    return output_image, \
+        gr.update(value=output_space_maps_and_labels, columns=len(output_space_maps_and_labels)), \
+        gr.update(value=cross_attention_maps_and_labels, columns=len(cross_attention_maps_and_labels))
 with gr.Blocks(
     css="""
+        .container { max-width: 1200px; margin: 0 auto; padding: 20px; }
+        .title { text-align: center; margin-bottom: 10px; }
+        .authors { text-align: center; margin-bottom: 10px; }
+        .affiliations { text-align: center; color: #666; margin-bottom: 10px; }
+        .abstract { text-align: center; margin-bottom: 40px; }
+        .generated-image {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            height: 100%; /* Ensures full height */
+        }
+        .input {
+            height: 47px;
+        }
+        .input-column {
+            flex-direction: column;
+            gap: 0px;
+        }
+        .input-column-label {}
+        .gallery {
+            # scrollbar-width: thin;
+            # scrollbar-color: #27272A;
+        }
+        .run-button-column {
+            width: 100px !important;
+        }
+    """
+    # ,
+    # elem_classes="container"
 ) as demo:
     with gr.Column(elem_classes="container"):
+        gr.Markdown("# ConceptAttention: Visualize Any Concepts in Your Generated Images", elem_classes="title")
+        # gr.Markdown("### Alec Helbling¹, Tuna Meral², Ben Hoover¹³, Pinar Yanardag², Duen Horng (Polo) Chau¹", elem_classes="authors")
+        # gr.Markdown("### ¹Georgia Tech · ²Virginia Tech · ³IBM Research", elem_classes="affiliations")
+        gr.Markdown("## Interpret generative models with precise, high-quality heatmaps. Check out our paper [here](https://arxiv.org/abs/2502.04320).", elem_classes="abstract")
+        with gr.Row(scale=1, equal_height=True):
+            with gr.Column(scale=3, elem_classes="input-column"):
+                gr.HTML(
+                    "Write a Prompt",
+                    elem_classes="input-column-label"
+                )
+                prompt = gr.Dropdown(
+                    ["A dog by a tree", "A dragon", "A hot air balloon"],
+                    # label="Prompt",
+                    container=False,
+                    # scale=3,
+                    allow_custom_value=True,
+                    elem_classes="input"
+                )
+            with gr.Column(scale=7, elem_classes="input-column"):
+                gr.HTML(
+                    "Select or Write Concepts",
+                    elem_classes="input-column-label"
+                )
+                concepts = gr.Dropdown(
+                    ["dog", "grass", "tree", "dragon", "sky", "rock", "cloud", "balloon", "water", "background"],
+                    value=["dog", "grass", "tree", "background"],
+                    multiselect=True,
+                    label="Concepts",
+                    container=False,
+                    allow_custom_value=True,
+                    # scale=4,
+                    elem_classes="input",
+                    max_choices=5
+                )
+            with gr.Column(scale=1, min_width=100, elem_classes="input-column run-button-column"):
+                gr.HTML(
+                    "&#8203;",
+                    elem_classes="input-column-label"
+                )
+                submit_btn = gr.Button(
+                    "Run",
+                    # scale=1,
+                    elem_classes="input"
+                )
+            # prompt = gr.Textbox(
+            #     label="Enter your prompt",
+            #     placeholder="Enter your prompt",
+            #     value=EXAMPLES[0][0],
+            #     scale=4,
+            #     # show_label=True,
+            #     container=False
+            #     # height="80px"
+            # )
+            # words = gr.Textbox(
+            #     label="Enter a list of concepts (comma-separated)",
+            #     placeholder="Enter a list of concepts (comma-separated)",
+            #     value=EXAMPLES[0][1],
+            #     scale=4,
+            #     # show_label=True,
+            #     container=False
+            #     # height="80px"
+            # )
         num_rows_state = gr.State(value=1)  # Initial number of rows
         # generated_image = gr.Image(label="Generated Image", elem_classes="input-image")
+        # gallery = gr.Gallery(
+        #     label="Generated images",
+        #     show_label=True,
+        #     # elem_id="gallery",
+        #     columns=COLUMNS,
+        #     rows=1,
+        #     # object_fit="contain",
+        #     height="auto",
+        #     elem_classes="gallery"
+        # )
+        with gr.Row(elem_classes="gallery", scale=8):
+            with gr.Column(scale=1):
+                generated_image = gr.Image(
+                    elem_classes="generated-image",
+                    show_label=False
+                )
+            with gr.Column(scale=4):
+                concept_attention_gallery = gr.Gallery(
+                    label="Concept Attention (Ours)",
+                    show_label=True,
+                    # columns=3,
+                    rows=1,
+                    object_fit="contain",
+                    height="200px",
+                    elem_classes="gallery"
+                )
+                cross_attention_gallery = gr.Gallery(
+                    label="Cross Attention",
+                    show_label=True,
+                    # columns=3,
+                    rows=1,
+                    object_fit="contain",
+                    height="200px",
+                    elem_classes="gallery"
+                )
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
             layer_start_index = gr.Slider(minimum=0, maximum=18, step=1, label="Layer Start Index", value=10)
             timestep_start_index = gr.Slider(minimum=0, maximum=4, step=1, label="Timestep Start Index", value=2)
         submit_btn.click(
             fn=process_inputs,
+            inputs=[prompt, concepts, seed, layer_start_index, timestep_start_index],
+            outputs=[generated_image, concept_attention_gallery, cross_attention_gallery]
         )
+        # gr.Examples(examples=EXAMPLES, inputs=[prompt, concepts, seed, layer_start_index, timestep_start_index], outputs=[gallery, num_rows_state], fn=process_inputs, cache_examples=False)
         # num_rows_state.change(
         #     fn=lambda rows: gr.Gallery.update(rows=int(rows)),
         #     inputs=[num_rows_state],
         #     outputs=[gallery]
         # )
+        prompt.change(update_default_concepts, inputs=[prompt], outputs=[concepts])
+        # Automatically process the first example on launch
+        demo.load(
+            process_inputs,
+            inputs=[prompt, concepts, seed, layer_start_index, timestep_start_index],
+            outputs=[generated_image, concept_attention_gallery, cross_attention_gallery]
+        )
 if __name__ == "__main__":
     demo.launch(max_threads=1)

concept_attention/concept_attention_pipeline.py CHANGED Viewed

@@ -6,6 +6,7 @@ import PIL
 import numpy as np
 import matplotlib.pyplot as plt
 import torch
 from concept_attention.binary_segmentation_baselines.raw_cross_attention import RawCrossAttentionBaseline, RawCrossAttentionSegmentationModel
 from concept_attention.binary_segmentation_baselines.raw_output_space import RawOutputSpaceBaseline, RawOutputSpaceSegmentationModel
@@ -15,6 +16,7 @@ from concept_attention.image_generator import FluxGenerator
 class ConceptAttentionPipelineOutput():
     image: PIL.Image.Image | np.ndarray
     concept_heatmaps: list[PIL.Image.Image]
 class ConceptAttentionFluxPipeline():
     """
@@ -36,19 +38,6 @@ class ConceptAttentionFluxPipeline():
             offload=offload_model,
             device=device
         )
-        # Make a Raw Cross Attention Segmentation Model and Raw Output space segmentation model
-        self.cross_attention_segmentation_model = RawCrossAttentionSegmentationModel(
-            generator=self.flux_generator
-        )
-        self.output_space_segmentation_model = RawOutputSpaceSegmentationModel(
-            generator=self.flux_generator
-        )
-        self.raw_output_space_generator = RawOutputSpaceBaseline(
-            generator=self.flux_generator
-        )
-        self.raw_cross_attention_generator = RawCrossAttentionBaseline(
-            generator=self.flux_generator
-        )
     @torch.no_grad()
     def generate_image(
@@ -77,20 +66,50 @@ class ConceptAttentionFluxPipeline():
         if timesteps is None:
             timesteps = list(range(num_inference_steps))
         # Run the raw output space object
-        concept_heatmaps, image = self.raw_output_space_generator(
-            prompt,
-            concepts,
-            seed=seed,
-            num_steps=num_inference_steps,
-            timesteps=timesteps,
-            layers=layer_indices,
-            softmax=softmax,
-            height=width,
             width=width,
             guidance=guidance,
         )
-        # Convert to numpy
-        concept_heatmaps = concept_heatmaps.detach().cpu().numpy()[0]
         # Convert the torch heatmaps to PIL images.
         if return_pil_heatmaps:
             # Convert to a matplotlib color scheme
@@ -103,63 +122,73 @@ class ConceptAttentionFluxPipeline():
             concept_heatmaps = [PIL.Image.fromarray(concept_heatmap) for concept_heatmap in colored_heatmaps]
         return ConceptAttentionPipelineOutput(
             image=image,
-            concept_heatmaps=concept_heatmaps
         )
-    def encode_image(
-        self,
-        image: PIL.Image.Image,
-        concepts: list[str],
-        prompt: str = "", # Optional
-        width: int = 1024,
-        height: int = 1024,
-        return_cross_attention = False,
-        layer_indices = list(range(15, 19)),
-        num_samples: int = 1,
-        device: str = "cuda:0",
-        return_pil_heatmaps: bool = True,
-        seed: int = 0,
-        cmap="plasma"
-    ) -> ConceptAttentionPipelineOutput:
-        """
-            Encode an image with flux, given a list of concepts.
-        """
-        assert return_cross_attention is False, "Not supported yet"
-        assert all([layer_index >= 0 and layer_index < 19 for layer_index in layer_indices]), "Invalid layer index"
-        assert height == width, "Height and width must be the same for now"
-        # Run the raw output space object
-        concept_heatmaps, _ = self.output_space_segmentation_model.segment_individual_image(
-            image=image,
-            concepts=concepts,
-            caption=prompt,
-            device=device,
-            softmax=True,
-            layers=layer_indices,
-            num_samples=num_samples,
-            height=height,
-            width=width
-        )
-        concept_heatmaps = concept_heatmaps.detach().cpu().numpy().squeeze()
-        # Convert the torch heatmaps to PIL images.
-        if return_pil_heatmaps:
-            min_val = concept_heatmaps.min()
-            max_val = concept_heatmaps.max()
-            # Convert to a matplotlib color scheme
-            colored_heatmaps = []
-            for concept_heatmap in concept_heatmaps:
-                # concept_heatmap = (concept_heatmap - concept_heatmap.min()) / (concept_heatmap.max() - concept_heatmap.min())
-                concept_heatmap = (concept_heatmap - min_val) / (max_val - min_val)
-                colored_heatmap = plt.get_cmap(cmap)(concept_heatmap)
-                rgb_image = (colored_heatmap[:, :, :3] * 255).astype(np.uint8)
-                colored_heatmaps.append(rgb_image)
-            concept_heatmaps = [PIL.Image.fromarray(concept_heatmap) for concept_heatmap in colored_heatmaps]
-        return ConceptAttentionPipelineOutput(
-            image=image,
-            concept_heatmaps=concept_heatmaps
-        )

 import numpy as np
 import matplotlib.pyplot as plt
 import torch
+import einops
 from concept_attention.binary_segmentation_baselines.raw_cross_attention import RawCrossAttentionBaseline, RawCrossAttentionSegmentationModel
 from concept_attention.binary_segmentation_baselines.raw_output_space import RawOutputSpaceBaseline, RawOutputSpaceSegmentationModel
 class ConceptAttentionPipelineOutput():
     image: PIL.Image.Image | np.ndarray
     concept_heatmaps: list[PIL.Image.Image]
+    cross_attention_maps: list[PIL.Image.Image]
 class ConceptAttentionFluxPipeline():
     """
             offload=offload_model,
             device=device
         )
     @torch.no_grad()
     def generate_image(
         if timesteps is None:
             timesteps = list(range(num_inference_steps))
         # Run the raw output space object
+        image, cross_attention_maps, concept_heatmaps = self.flux_generator.generate_image(
             width=width,
+            height=height,
+            prompt=prompt,
+            num_steps=num_inference_steps,
+            concepts=concepts,
+            seed=seed,
             guidance=guidance,
         )
+        # Concept heamaps extraction
+        if softmax:
+            concept_heatmaps = torch.nn.functional.softmax(concept_heatmaps, dim=-2)
+        concept_heatmaps = concept_heatmaps[:, layer_indices]
+        concept_heatmaps = einops.reduce(
+            concept_heatmaps,
+            "time layers batch concepts patches -> batch concepts patches",
+            reduction="mean"
+        )
+        concept_heatmaps = einops.rearrange(
+            concept_heatmaps,
+            "batch concepts (h w) -> batch concepts h w",
+            h=64,
+            w=64
+        )
+        # Cross attention maps
+        if softmax:
+            cross_attention_maps = torch.nn.functional.softmax(cross_attention_maps, dim=-2)
+        cross_attention_maps = cross_attention_maps[:, layer_indices]
+        cross_attention_maps = einops.reduce(
+            cross_attention_maps,
+            "time layers batch concepts patches -> batch concepts patches",
+            reduction="mean"
+        )
+        cross_attention_maps = einops.rearrange(
+            cross_attention_maps,
+            "batch concepts (h w) -> batch concepts h w",
+            h=64,
+            w=64
+        )
+        concept_heatmaps = concept_heatmaps.to(torch.float32).detach().cpu().numpy()[0]
+        cross_attention_maps = cross_attention_maps.to(torch.float32).detach().cpu().numpy()[0]
         # Convert the torch heatmaps to PIL images.
         if return_pil_heatmaps:
             # Convert to a matplotlib color scheme
             concept_heatmaps = [PIL.Image.fromarray(concept_heatmap) for concept_heatmap in colored_heatmaps]
+            colored_cross_attention_maps = []
+            for cross_attention_map in cross_attention_maps:
+                cross_attention_map = (cross_attention_map - cross_attention_map.min()) / (cross_attention_map.max() - cross_attention_map.min())
+                colored_cross_attention_map = plt.get_cmap(cmap)(cross_attention_map)
+                rgb_image = (colored_cross_attention_map[:, :, :3] * 255).astype(np.uint8)
+                colored_cross_attention_maps.append(rgb_image)
+            cross_attention_maps = [PIL.Image.fromarray(cross_attention_map) for cross_attention_map in colored_cross_attention_maps]
         return ConceptAttentionPipelineOutput(
             image=image,
+            concept_heatmaps=concept_heatmaps,
+            cross_attention_maps=cross_attention_maps
         )
+    # def encode_image(
+    #     self,
+    #     image: PIL.Image.Image,
+    #     concepts: list[str],
+    #     prompt: str = "", # Optional
+    #     width: int = 1024,
+    #     height: int = 1024,
+    #     return_cross_attention = False,
+    #     layer_indices = list(range(15, 19)),
+    #     num_samples: int = 1,
+    #     device: str = "cuda:0",
+    #     return_pil_heatmaps: bool = True,
+    #     seed: int = 0,
+    #     cmap="plasma"
+    # ) -> ConceptAttentionPipelineOutput:
+    #     """
+    #         Encode an image with flux, given a list of concepts.
+    #     """
+    #     assert return_cross_attention is False, "Not supported yet"
+    #     assert all([layer_index >= 0 and layer_index < 19 for layer_index in layer_indices]), "Invalid layer index"
+    #     assert height == width, "Height and width must be the same for now"
+    #     # Run the raw output space object
+    #     concept_heatmaps, _ = self.output_space_segmentation_model.segment_individual_image(
+    #         image=image,
+    #         concepts=concepts,
+    #         caption=prompt,
+    #         device=device,
+    #         softmax=True,
+    #         layers=layer_indices,
+    #         num_samples=num_samples,
+    #         height=height,
+    #         width=width
+    #     )
+    #     concept_heatmaps = concept_heatmaps.detach().cpu().numpy().squeeze()
+    #     # Convert the torch heatmaps to PIL images.
+    #     if return_pil_heatmaps:
+    #         min_val = concept_heatmaps.min()
+    #         max_val = concept_heatmaps.max()
+    #         # Convert to a matplotlib color scheme
+    #         colored_heatmaps = []
+    #         for concept_heatmap in concept_heatmaps:
+    #             # concept_heatmap = (concept_heatmap - concept_heatmap.min()) / (concept_heatmap.max() - concept_heatmap.min())
+    #             concept_heatmap = (concept_heatmap - min_val) / (max_val - min_val)
+    #             colored_heatmap = plt.get_cmap(cmap)(concept_heatmap)
+    #             rgb_image = (colored_heatmap[:, :, :3] * 255).astype(np.uint8)
+    #             colored_heatmaps.append(rgb_image)
+    #         concept_heatmaps = [PIL.Image.fromarray(concept_heatmap) for concept_heatmap in colored_heatmaps]
+    #     return ConceptAttentionPipelineOutput(
+    #         image=image,
+    #         concept_heatmaps=concept_heatmaps
+    #     )