Spaces:

helblazer811
/

ConceptAttention

Running on Zero

App Files Files Community

helblazer811 commited on 11 days ago

Commit

55866f4

0 Parent(s):

"Orphan branch commit with a readme"

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +5 -0
README.md +7 -0
app.py +140 -0
concept_attention/__init__.py +2 -0
concept_attention/binary_segmentation_baselines/__init__.py +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/__init__.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/chefer_clip_vit_baselines.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/clip_text_span_baseline.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/daam.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/daam_sd2.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/daam_sdxl.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/dino.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/raw_value_space.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_clip_vit_baselines.py +272 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_LRP.py +437 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_explanation_generator.py +83 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_new.py +238 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_orig_LRP.py +425 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_LRP.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_explanation_generator.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_new.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_orig_LRP.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/helpers.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/layer_helpers.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/weight_init.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/VOC.py +395 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__init__.py +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/Imagenet.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/VOC.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/__init__.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/imagenet.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/imagenet.py +200 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/imagenet_utils.py +1002 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/transforms.py +442 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/generate_visualizations.py +208 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/helpers.py +295 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/layer_helpers.py +21 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/misc_functions.py +68 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__init__.py +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/__init__.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/layers_lrp.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/layers_ours.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/layers_lrp.py +261 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/layers_ours.py +280 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/pertubation_eval_from_hdf5.py +232 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__init__.py +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__pycache__/__init__.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__pycache__/confusionmatrix.cpython-310.pyc +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+*.png
+*.pyc
+concept_attention.egg-info
+concept_attention/flux/src/flux.egg-info/PKG-INFO
+*.pyc

README.md ADDED Viewed

	@@ -0,0 +1,7 @@

+---
+title: ConceptAttention
+sdk: gradio
+sdk_version: "5.15.0"
+app_file: app.py
+pinned: false
+---

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import base64
+import io
+import spaces
+import gradio as gr
+from PIL import Image
+from concept_attention import ConceptAttentionFluxPipeline
+concept_attention_default_args = {
+    "model_name": "flux-schnell",
+    "device": "cuda",
+    "layer_indices": list(range(10, 19)),
+    "timesteps": list(range(4)),
+    "num_samples": 4,
+    "num_inference_steps": 4
+}
+IMG_SIZE = 250
+EXAMPLES = [
+    [
+        "A fluffy cat sitting on a windowsill",  # prompt
+        "cat.jpg",  # image
+        "fur, whiskers, eyes",  # words
+        42,  # seed
+    ],
+    # ["Mountain landscape with lake", "cat.jpg", "sky, trees, water", 123],
+    # ["Portrait of a young woman", "monkey.png", "face, hair, eyes", 456],
+]
+pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
+@spaces.GPU(duration=60)
+def process_inputs(prompt, input_image, word_list, seed):
+    prompt = prompt.strip()
+    if not word_list.strip():
+        return None, "Please enter comma-separated words"
+    concepts = [w.strip() for w in word_list.split(",")]
+    if input_image is not None:
+        input_image = Image.fromarray(input_image)
+        input_image = input_image.convert("RGB")
+        input_image = input_image.resize((1024, 1024))
+        pipeline_output = pipeline.encode_image(
+            image=input_image,
+            concepts=concepts,
+            prompt=prompt,
+            width=1024,
+            height=1024,
+            seed=seed,
+            num_samples=concept_attention_default_args["num_samples"]
+        )
+    else:
+        pipeline_output = pipeline.generate_image(
+            prompt=prompt,
+            concepts=concepts,
+            width=1024,
+            height=1024,
+            seed=seed,
+            timesteps=concept_attention_default_args["timesteps"],
+            num_inference_steps=concept_attention_default_args["num_inference_steps"],
+        )
+    output_image = pipeline_output.image
+    concept_heatmaps = pipeline_output.concept_heatmaps
+    html_elements = []
+    for concept, heatmap in zip(concepts, concept_heatmaps):
+        img = heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST)
+        buffered = io.BytesIO()
+        img.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        html = f"""
+        <div style='text-align: center; margin: 5px; padding: 5px;  overflow-x: auto; white-space: nowrap;'>
+            <h1 style='margin-bottom: 10px;'>{concept}</h1>
+            <img src='data:image/png;base64,{img_str}' style='width: {IMG_SIZE}px; display: inline-block; height: {IMG_SIZE}px;'>
+        </div>
+        """
+        html_elements.append(html)
+    combined_html = "<div style='display: flex; flex-wrap: wrap; justify-content: center;'>" + "".join(html_elements) + "</div>"
+    return output_image, combined_html
+with gr.Blocks(
+    css="""
+    .container { max-width: 1200px; margin: 0 auto; padding: 20px; }
+    .title { text-align: center; margin-bottom: 10px; }
+    .authors { text-align: center; margin-bottom: 20px; }
+    .affiliations { text-align: center; color: #666; margin-bottom: 40px; }
+    .content { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
+    .section { border: 2px solid #ddd; border-radius: 10px; padding: 20px; }
+"""
+) as demo:
+    with gr.Column(elem_classes="container"):
+        gr.Markdown("# ConceptAttention: Diffusion Transformers Learn Highly Interpretable Features", elem_classes="title")
+        gr.Markdown("**Alec Helbling**¹, **Tuna Meral**², **Ben Hoover**¹³, **Pinar Yanardag**², **Duen Horng (Polo) Chau**¹", elem_classes="authors")
+        gr.Markdown("¹Georgia Tech · ²Virginia Tech · ³IBM Research", elem_classes="affiliations")
+        with gr.Row(elem_classes="content"):
+            with gr.Column(elem_classes="section"):
+                gr.Markdown("### Input")
+                prompt = gr.Textbox(label="Enter your prompt")
+                words = gr.Textbox(label="Enter words (comma-separated)")
+                seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
+                gr.HTML("<div style='text-align: center;'> <h1> Or </h1> </div>")
+                image_input = gr.Image(type="numpy", label="Upload image (optional)")
+            with gr.Column(elem_classes="section"):
+                gr.Markdown("### Output")
+                output_image = gr.Image(type="numpy", label="Output image")
+        with gr.Row():
+            submit_btn = gr.Button("Process")
+        with gr.Row(elem_classes="section"):
+            saliency_display = gr.HTML(label="Saliency Maps")
+        submit_btn.click(
+            fn=process_inputs,
+            inputs=[prompt, image_input, words, seed], outputs=[output_image, saliency_display]
+        )
+        gr.Examples(examples=EXAMPLES, inputs=[prompt, image_input, words, seed], outputs=[output_image, saliency_display], fn=process_inputs, cache_examples=False)
+if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        inbrowser=True,
+        # share=False,
+        server_port=6754,
+        quiet=True,
+        max_threads=1
+    )

concept_attention/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ from concept_attention.concept_attention_pipeline import ConceptAttentionFluxPipeline

concept_attention/binary_segmentation_baselines/__init__.py ADDED Viewed

File without changes

concept_attention/binary_segmentation_baselines/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (214 Bytes). View file

concept_attention/binary_segmentation_baselines/__pycache__/chefer_clip_vit_baselines.cpython-310.pyc ADDED Viewed

Binary file (7.18 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/clip_text_span_baseline.cpython-310.pyc ADDED Viewed

Binary file (3.66 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/daam.cpython-310.pyc ADDED Viewed

Binary file (2.52 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/daam_sd2.cpython-310.pyc ADDED Viewed

Binary file (3.81 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/daam_sdxl.cpython-310.pyc ADDED Viewed

Binary file (4.69 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/dino.cpython-310.pyc ADDED Viewed

Binary file (2.93 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc ADDED Viewed

Binary file (6.26 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc ADDED Viewed

Binary file (7 kB). View file

concept_attention/binary_segmentation_baselines/__pycache__/raw_value_space.cpython-310.pyc ADDED Viewed

Binary file (6.64 kB). View file

concept_attention/binary_segmentation_baselines/chefer_clip_vit_baselines.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+    This is just a wrapper around the various baselines implemented in the
+    Chefer et. al. Transformer Explainability repository.
+    Implements
+    - CheferLRPSegmentationModel
+    - CheferRolloutSegmentationModel
+    - CheferLastLayerAttentionSegmentationModel
+    - CheferAttentionGradCAMSegmentationModel
+    - CheferTransformerAttributionSegmentationModel
+    - CheferFullLRPSegmentationModel
+    - CheferLastLayerLRPSegmentationModel
+"""
+#  # segmentation test for the rollout baseline
+#     if args.method == 'rollout':
+#         Res = baselines.generate_rollout(image.cuda(), start_layer=1).reshape(batch_size, 1, 14, 14)
+#     # segmentation test for the LRP baseline (this is full LRP, not partial)
+#     elif args.method == 'full_lrp':
+#         Res = orig_lrp.generate_LRP(image.cuda(), method="full").reshape(batch_size, 1, 224, 224)
+#     # segmentation test for our method
+#     elif args.method == 'transformer_attribution':
+#         Res = lrp.generate_LRP(image.cuda(), start_layer=1, method="transformer_attribution").reshape(batch_size, 1, 14, 14)
+#     # segmentation test for the partial LRP baseline (last attn layer)
+#     elif args.method == 'lrp_last_layer':
+#         Res = orig_lrp.generate_LRP(image.cuda(), method="last_layer", is_ablation=args.is_ablation)\
+#             .reshape(batch_size, 1, 14, 14)
+#     # segmentation test for the raw attention baseline (last attn layer)
+#     elif args.method == 'attn_last_layer':
+#         Res = orig_lrp.generate_LRP(image.cuda(), method="last_layer_attn", is_ablation=args.is_ablation)\
+#             .reshape(batch_size, 1, 14, 14)
+#     # segmentation test for the GradCam baseline (last attn layer)
+#     elif args.method == 'attn_gradcam':
+#         Res = baselines.generate_cam_attn(image.cuda()).reshape(batch_size, 1, 14, 14)
+#     if args.method != 'full_lrp':
+#         # interpolate to full image size (224,224)
+#         Res = torch.nn.functional.interpolate(Res, scale_factor=16, mode='bilinear').cuda()
+import torch
+import PIL
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_explanation_generator import LRP
+from concept_attention.segmentation import SegmentationAbstractClass
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_explanation_generator import Baselines, LRP
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_new import vit_base_patch16_224
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_LRP import vit_base_patch16_224 as vit_LRP
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_orig_LRP import vit_base_patch16_224 as vit_orig_LRP
+# # Model
+# model = vit_base_patch16_224(pretrained=True).cuda()
+# baselines = Baselines(model)
+# # LRP
+# model_LRP = vit_LRP(pretrained=True).cuda()
+# model_LRP.eval()
+# lrp = LRP(model_LRP)
+# # orig LRP
+# model_orig_LRP = vit_orig_LRP(pretrained=True).cuda()
+# model_orig_LRP.eval()
+# orig_lrp = LRP(model_orig_LRP)
+# model.eval()
+class CheferLRPSegmentationModel(SegmentationAbstractClass):
+    def __init__(
+        self,
+        device: str = "cuda",
+        width: int = 224,
+        height: int = 224,
+    ):
+        """
+            Initialize the segmentation model.
+        """
+        super(CheferLRPSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        # Load the LRP model
+        model_orig_LRP = vit_orig_LRP(pretrained=True).to(self.device)
+        model_orig_LRP.eval()
+        self.orig_lrp = LRP(model_orig_LRP)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        """
+            Takes a real image and generates a concept segmentation map
+            it by adding noise and running the DiT on it.
+        """
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.orig_lrp.generate_LRP(
+            image.to(self.device),
+            method="full"
+        )
+        prediction_map = prediction_map.unsqueeze(0)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferRolloutSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferRolloutSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model = vit_base_patch16_224(pretrained=True).to(device)
+        self.baselines = Baselines(model)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.baselines.generate_rollout(
+            image.to(self.device), start_layer=1
+        ).reshape(1, 1, 14, 14)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferLastLayerAttentionSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferLastLayerAttentionSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model_orig_LRP = vit_orig_LRP(pretrained=True).to(device)
+        model_orig_LRP.eval()
+        self.orig_lrp = LRP(model_orig_LRP)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.orig_lrp.generate_LRP(
+            image.to(self.device), method="last_layer_attn"
+        ).reshape(1, 1, 14, 14)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferAttentionGradCAMSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferAttentionGradCAMSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model = vit_base_patch16_224(pretrained=True).to(device)
+        self.baselines = Baselines(model)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.baselines.generate_cam_attn(
+            image.to(self.device)
+        ).reshape(1, 1, 14, 14)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferTransformerAttributionSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferTransformerAttributionSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model_LRP = vit_LRP(pretrained=True).to(device)
+        model_LRP.eval()
+        self.lrp = LRP(model_LRP)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.lrp.generate_LRP(
+            image.to(self.device), start_layer=1, method="transformer_attribution"
+        ).reshape(1, 1, 14, 14)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferFullLRPSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferFullLRPSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model_LRP = vit_LRP(pretrained=True).to(device)
+        model_LRP.eval()
+        self.lrp = LRP(model_LRP)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.lrp.generate_LRP(
+            image.to(self.device), method="full"
+        ).reshape(1, 1, 224, 224)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None
+class CheferLastLayerLRPSegmentationModel(SegmentationAbstractClass):
+    def __init__(self, device: str = "cuda", width: int = 224, height: int = 224):
+        super(CheferLastLayerLRPSegmentationModel, self).__init__()
+        self.width = width
+        self.height = height
+        self.device = device
+        model_LRP = vit_LRP(pretrained=True).to(device)
+        model_LRP.eval()
+        self.lrp = LRP(model_LRP)
+    def segment_individual_image(self, image: torch.Tensor, concepts: list[str], caption: str, **kwargs):
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+        prediction_map = self.lrp.generate_LRP(
+            image.to(self.device), method="last_layer"
+        ).reshape(1, 1, 14, 14)
+        # Rescale the prediction map to 64x64
+        prediction_map = torch.nn.functional.interpolate(
+            prediction_map,
+            size=(self.width, self.height),
+            mode="nearest"
+        ).reshape(1, self.width, self.height)
+        return prediction_map, None

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_LRP.py ADDED Viewed

	@@ -0,0 +1,437 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from einops import rearrange
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.modules.layers_ours import *
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.helpers import load_pretrained
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.weight_init import trunc_normal_
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer+1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        # A = Q*K^T
+        self.matmul1 = einsum('bhid,bhjd->bhij')
+        # attn = A*V
+        self.matmul2 = einsum('bhij,bhjd->bhid')
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, 'b n (h d) -> b h n d', h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v)= self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange([cam_q, cam_k, cam_v], 'qkv b h n d -> b n (qkv h d)', qkv=3, h=self.num_heads)
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1,2)
+        cam = cam.reshape(cam.shape[0], cam.shape[1],
+                     (self.img_size[0] // self.patch_size[0]), (self.img_size[1] // self.patch_size[1]))
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, mlp_head=False, drop_rate=0., attn_drop_rate=0.):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate)
+            for i in range(depth)])
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self,grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(self, cam=None,method="transformer_attribution", is_ablation=False, start_layer=0, **kwargs):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        # our method, method name grad is legacy
+        elif method == "transformer_attribution" or method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model
+def deit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = _cfg()
+    if pretrained:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url="https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth",
+            map_location="cpu", check_hash=True
+        )
+        model.load_state_dict(checkpoint["model"])
+    return model

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_explanation_generator.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import argparse
+import torch
+import numpy as np
+from numpy import *
+# compute rollout between attention layers
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration- code adapted from https://github.com/samiraabnar/attention_flow
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    matrices_aug = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+                          for i in range(len(all_layer_matrices))]
+    joint_attention = matrices_aug[start_layer]
+    for i in range(start_layer+1, len(matrices_aug)):
+        joint_attention = matrices_aug[i].bmm(joint_attention)
+    return joint_attention
+class LRP:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_LRP(self, input, index=None, method="transformer_attribution", is_ablation=False, start_layer=0):
+        output = self.model(input)
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.to(input.device) * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        return self.model.relprop(torch.tensor(one_hot_vector).to(input.device), method=method, is_ablation=is_ablation,
+                                  start_layer=start_layer, **kwargs)
+class Baselines:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_cam_attn(self, input, index=None):
+        output = self.model(input, register_hook=True)
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy())
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0][index] = 1
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.to(output.device) * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        #################### attn
+        grad = self.model.blocks[-1].attn.get_attn_gradients()
+        cam = self.model.blocks[-1].attn.get_attention_map()
+        cam = cam[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad.mean(dim=[1, 2], keepdim=True)
+        cam = (cam * grad).mean(0).clamp(min=0)
+        cam = (cam - cam.min()) / (cam.max() - cam.min())
+        return cam
+        #################### attn
+    def generate_rollout(self, input, start_layer=0):
+        self.model(input)
+        blocks = self.model.blocks
+        all_layer_attentions = []
+        for blk in blocks:
+            attn_heads = blk.attn.get_attention_map()
+            avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+            all_layer_attentions.append(avg_heads)
+        rollout = compute_rollout_attention(all_layer_attentions, start_layer=start_layer)
+        return rollout[:,0, 1:]

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_new.py ADDED Viewed

	@@ -0,0 +1,238 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from functools import partial
+from einops import rearrange
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.helpers import load_pretrained
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.weight_init import trunc_normal_
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.attn_gradients = None
+        self.attention_map = None
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def save_attention_map(self, attention_map):
+        self.attention_map = attention_map
+    def get_attention_map(self):
+        return self.attention_map
+    def forward(self, x, register_hook=False):
+        b, n, _, h = *x.shape, self.num_heads
+        # self.save_output(x)
+        # x.register_hook(self.save_output_grad)
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv = 3, h = h)
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+        attn = dots.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        self.save_attention_map(attn)
+        if register_hook:
+            attn.register_hook(self.save_attn_gradients)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out =  self.proj(out)
+        out = self.proj_drop(out)
+        return out
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, register_hook=False):
+        x = x + self.attn(self.norm1(x), register_hook=register_hook)
+        x = x + self.mlp(self.norm2(x))
+        return x
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class VisionTransformer(nn.Module):
+    """ Vision Transformer
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x, register_hook=False):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, register_hook=register_hook)
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        return x
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/ViT_orig_LRP.py ADDED Viewed

	@@ -0,0 +1,425 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from einops import rearrange
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.modules.layers_lrp import *
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.helpers import load_pretrained
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.weight_init import trunc_normal_
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer+1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        # A = Q*K^T
+        self.matmul1 = einsum('bhid,bhjd->bhij')
+        # attn = A*V
+        self.matmul2 = einsum('bhij,bhjd->bhid')
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, 'b n (h d) -> b h n d', h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v)= self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange([cam_q, cam_k, cam_v], 'qkv b h n d -> b n (qkv h d)', qkv=3, h=self.num_heads)
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1,2)
+        cam = cam.reshape(cam.shape[0], cam.shape[1],
+                     (self.img_size[0] // self.patch_size[0]), (self.img_size[1] // self.patch_size[1]))
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, mlp_head=False, drop_rate=0., attn_drop_rate=0.):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate)
+            for i in range(depth)])
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self,grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(self, cam=None,method="grad", is_ablation=False, start_layer=0, **kwargs):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        elif method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_LRP.cpython-310.pyc ADDED Viewed

Binary file (14.4 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_explanation_generator.cpython-310.pyc ADDED Viewed

Binary file (3.49 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_new.cpython-310.pyc ADDED Viewed

Binary file (9.15 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/ViT_orig_LRP.cpython-310.pyc ADDED Viewed

Binary file (13.9 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/helpers.cpython-310.pyc ADDED Viewed

Binary file (7.28 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/layer_helpers.cpython-310.pyc ADDED Viewed

Binary file (810 Bytes). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/__pycache__/weight_init.cpython-310.pyc ADDED Viewed

Binary file (1.98 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/VOC.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import os
+import tarfile
+import torch
+import torch.utils.data as data
+import numpy as np
+import h5py
+from PIL import Image
+from scipy import io
+from torchvision.datasets.utils import download_url
+DATASET_YEAR_DICT = {
+    '2012': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '6cd6e144f989b92b3379bac3b3de84fd',
+        'base_dir': 'VOCdevkit/VOC2012'
+    },
+    '2011': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar',
+        'filename': 'VOCtrainval_25-May-2011.tar',
+        'md5': '6c3384ef61512963050cb5d687e5bf1e',
+        'base_dir': 'TrainVal/VOCdevkit/VOC2011'
+    },
+    '2010': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
+        'filename': 'VOCtrainval_03-May-2010.tar',
+        'md5': 'da459979d0c395079b5c75ee67908abb',
+        'base_dir': 'VOCdevkit/VOC2010'
+    },
+    '2009': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar',
+        'filename': 'VOCtrainval_11-May-2009.tar',
+        'md5': '59065e4b188729180974ef6572f6a212',
+        'base_dir': 'VOCdevkit/VOC2009'
+    },
+    '2008': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '2629fa636546599198acfcfbfcf1904a',
+        'base_dir': 'VOCdevkit/VOC2008'
+    },
+    '2007': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
+        'filename': 'VOCtrainval_06-Nov-2007.tar',
+        'md5': 'c52e279531787c972589f7e41ab4ae64',
+        'base_dir': 'VOCdevkit/VOC2007'
+    }
+}
+class VOCSegmentation(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    CLASSES = 20
+    # CLASSES_NAMES = [
+    #     "background", 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
+    #     'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
+    #     'motorcycle', 'person', 'pot', 'sheep', 'sofa', 'train',
+    #     'monitor'
+    #     # 'ambigious'
+    # ]
+    CLASSES_NAMES = [
+        "background", 'plane', 'bike', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
+        'motorcycle', 'person', 'pot', 'sheep', 'sofa', 'train',
+        'monitor'
+        # 'ambigious'
+    ]
+    def __init__(
+        self,
+        root,
+        year='2012',
+        image_set='train',
+        download=False,
+        transform=None,
+        target_transform=None,
+        binary_class=False
+    ):
+        self.root = os.path.expanduser(root)
+        self.binary_class = binary_class
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.target_transform = target_transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target[target == 255] = -1
+            target = torch.from_numpy(target).long()
+        # # Convert target to (2, height, width)
+        # target = torch.stack([target, 1 - target], dim=0)
+        # Get a list of the classes that are present in the image
+        visible_classes = np.unique(target)
+        # Convert these to class names
+        present_classes = [self.CLASSES_NAMES[i] for i in visible_classes if i != -1]
+        if self.binary_class:
+            # Take all classes that aren't zero or -1 and mkae them 1
+            target[target >= 1] = 1
+        return img, target, present_classes
+    @staticmethod
+    def _mask_transform(mask):
+        target = np.array(mask).astype('int32')
+        target[target == 255] = -1
+        return torch.from_numpy(target).long()
+    def __len__(self):
+        return len(self.images)
+    @property
+    def pred_offset(self):
+        return 0
+class VOCClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        # if self.transform is not None:
+        #     img = self.transform(img)
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+class VOCSBDClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 sbd_root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.sbd_root = os.path.expanduser(sbd_root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        sbd_image_dir = os.path.join(sbd_root, 'img')
+        sbd_mask_dir = os.path.join(sbd_root, 'cls')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        sbd_split = os.path.join(sbd_root, 'train.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            voc_file_names = [x.strip() for x in f.readlines()]
+        with open(os.path.join(sbd_split), "r") as f:
+            sbd_file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in voc_file_names]
+        self.images += [os.path.join(sbd_image_dir, x + ".jpg") for x in sbd_file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in voc_file_names]
+        self.masks += [os.path.join(sbd_mask_dir, x + ".mat") for x in sbd_file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        mask_path = self.masks[index]
+        if mask_path[-3:] == 'mat':
+            target = io.loadmat(mask_path, struct_as_record=False, squeeze_me=True)['GTcls'].Segmentation
+            target = Image.fromarray(target, mode='P')
+        else:
+            target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+def download_extract(url, root, filename, md5):
+    download_url(url, root, filename, md5)
+    with tarfile.open(os.path.join(root, filename), "r") as tar:
+        tar.extractall(path=root)
+class VOCResults(data.Dataset):
+    CLASSES = 20
+    CLASSES_NAMES = [
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+        'tvmonitor', 'ambigious'
+    ]
+    def __init__(self, path):
+        super(VOCResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path , 'r') as f:
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item])
+        class_pred = torch.tensor(self.data['class_pred'][item])
+        return image, vis, target, class_pred

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__init__.py ADDED Viewed

File without changes

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/Imagenet.cpython-310.pyc ADDED Viewed

Binary file (5.25 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/VOC.cpython-310.pyc ADDED Viewed

Binary file (12.1 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (220 Bytes). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/__pycache__/imagenet.cpython-310.pyc ADDED Viewed

Binary file (5.37 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/imagenet.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import os
+import torch
+import torch.utils.data as data
+import numpy as np
+import cv2
+from torchvision.datasets import ImageNet
+from PIL import Image, ImageFilter
+import h5py
+from glob import glob
+class ImageNet_blur(ImageNet):
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        gauss_blur = ImageFilter.GaussianBlur(11)
+        median_blur = ImageFilter.MedianFilter(11)
+        blurred_img1 = sample.filter(gauss_blur)
+        blurred_img2 = sample.filter(median_blur)
+        blurred_img = Image.blend(blurred_img1, blurred_img2, 0.5)
+        if self.transform is not None:
+            sample = self.transform(sample)
+            blurred_img = self.transform(blurred_img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return (sample, blurred_img), target
+class Imagenet_Segmentation(data.Dataset):
+    CLASSES = 2
+    def __init__(self,
+                 path,
+                 transform=None,
+                 target_transform=None):
+        self.path = path
+        self.transform = transform
+        self.target_transform = target_transform
+        # self.h5py = h5py.File(path, 'r+')
+        self.h5py = None
+        with h5py.File(path, 'r') as tmp:
+            self.data_length = len(tmp['/value/img'])
+    def __getitem__(self, index):
+        if self.h5py is None:
+            self.h5py = h5py.File(self.path, 'r')
+        img = np.array(self.h5py[self.h5py['/value/img'][index, 0]]).transpose((2, 1, 0))
+        target = np.array(self.h5py[self.h5py[self.h5py['/value/gt'][index, 0]][0, 0]]).transpose((1, 0))
+        img = Image.fromarray(img).convert('RGB')
+        target = Image.fromarray(target)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target = torch.from_numpy(target).long()
+        return img, target
+    def __len__(self):
+        # return len(self.h5py['/value/img'])
+        return self.data_length
+class Imagenet_Segmentation_Blur(data.Dataset):
+    CLASSES = 2
+    def __init__(self,
+                 path,
+                 transform=None,
+                 target_transform=None):
+        self.path = path
+        self.transform = transform
+        self.target_transform = target_transform
+        # self.h5py = h5py.File(path, 'r+')
+        self.h5py = None
+        tmp = h5py.File(path, 'r')
+        self.data_length = len(tmp['/value/img'])
+        tmp.close()
+        del tmp
+    def __getitem__(self, index):
+        if self.h5py is None:
+            self.h5py = h5py.File(self.path, 'r')
+        img = np.array(self.h5py[self.h5py['/value/img'][index, 0]]).transpose((2, 1, 0))
+        target = np.array(self.h5py[self.h5py[self.h5py['/value/gt'][index, 0]][0, 0]]).transpose((1, 0))
+        img = Image.fromarray(img).convert('RGB')
+        target = Image.fromarray(target)
+        gauss_blur = ImageFilter.GaussianBlur(11)
+        median_blur = ImageFilter.MedianFilter(11)
+        blurred_img1 = img.filter(gauss_blur)
+        blurred_img2 = img.filter(median_blur)
+        blurred_img = Image.blend(blurred_img1, blurred_img2, 0.5)
+        # blurred_img1 = cv2.GaussianBlur(img, (11, 11), 5)
+        # blurred_img2 = np.float32(cv2.medianBlur(img, 11))
+        # blurred_img = (blurred_img1 + blurred_img2) / 2
+        if self.transform is not None:
+            img = self.transform(img)
+            blurred_img = self.transform(blurred_img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target = torch.from_numpy(target).long()
+        return (img, blurred_img), target
+    def __len__(self):
+        # return len(self.h5py['/value/img'])
+        return self.data_length
+class Imagenet_Segmentation_eval_dir(data.Dataset):
+    CLASSES = 2
+    def __init__(self,
+                 path,
+                 eval_path,
+                 transform=None,
+                 target_transform=None):
+        self.transform = transform
+        self.target_transform = target_transform
+        self.h5py = h5py.File(path, 'r+')
+        # 500 each file
+        self.results = glob(os.path.join(eval_path, '*.npy'))
+    def __getitem__(self, index):
+        img = np.array(self.h5py[self.h5py['/value/img'][index, 0]]).transpose((2, 1, 0))
+        target = np.array(self.h5py[self.h5py[self.h5py['/value/gt'][index, 0]][0, 0]]).transpose((1, 0))
+        res = np.load(self.results[index])
+        img = Image.fromarray(img).convert('RGB')
+        target = Image.fromarray(target)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target = torch.from_numpy(target).long()
+        return img, target
+    def __len__(self):
+        return len(self.h5py['/value/img'])
+if __name__ == '__main__':
+    import torchvision.transforms as transforms
+    from tqdm import tqdm
+    from imageio import imsave
+    import scipy.io as sio
+    # meta = sio.loadmat('/home/shirgur/ext/Data/Datasets/temp/ILSVRC2012_devkit_t12/data/meta.mat', squeeze_me=True)['synsets']
+    # Data
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+    test_img_trans = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        normalize,
+    ])
+    test_lbl_trans = transforms.Compose([
+        transforms.Resize((224, 224), Image.NEAREST),
+    ])
+    ds = Imagenet_Segmentation('/home/shirgur/ext/Data/Datasets/imagenet-seg/other/gtsegs_ijcv.mat',
+                               transform=test_img_trans, target_transform=test_lbl_trans)
+    for i, (img, tgt) in enumerate(tqdm(ds)):
+        tgt = (tgt.numpy() * 255).astype(np.uint8)
+        imsave('/home/shirgur/ext/Code/C2S/run/imagenet/gt/{}.png'.format(i), tgt)
+    print('here')

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/imagenet_utils.py ADDED Viewed

	@@ -0,0 +1,1002 @@

+CLS2IDX = {
+    0: 'tench, Tinca tinca',
+    1: 'goldfish, Carassius auratus',
+    2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+    3: 'tiger shark, Galeocerdo cuvieri',
+    4: 'hammerhead, hammerhead shark',
+    5: 'electric ray, crampfish, numbfish, torpedo',
+    6: 'stingray',
+    7: 'cock',
+    8: 'hen',
+    9: 'ostrich, Struthio camelus',
+    10: 'brambling, Fringilla montifringilla',
+    11: 'goldfinch, Carduelis carduelis',
+    12: 'house finch, linnet, Carpodacus mexicanus',
+    13: 'junco, snowbird',
+    14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+    15: 'robin, American robin, Turdus migratorius',
+    16: 'bulbul',
+    17: 'jay',
+    18: 'magpie',
+    19: 'chickadee',
+    20: 'water ouzel, dipper',
+    21: 'kite',
+    22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+    23: 'vulture',
+    24: 'great grey owl, great gray owl, Strix nebulosa',
+    25: 'European fire salamander, Salamandra salamandra',
+    26: 'common newt, Triturus vulgaris',
+    27: 'eft',
+    28: 'spotted salamander, Ambystoma maculatum',
+    29: 'axolotl, mud puppy, Ambystoma mexicanum',
+    30: 'bullfrog, Rana catesbeiana',
+    31: 'tree frog, tree-frog',
+    32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+    33: 'loggerhead, loggerhead turtle, Caretta caretta',
+    34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+    35: 'mud turtle',
+    36: 'terrapin',
+    37: 'box turtle, box tortoise',
+    38: 'banded gecko',
+    39: 'common iguana, iguana, Iguana iguana',
+    40: 'American chameleon, anole, Anolis carolinensis',
+    41: 'whiptail, whiptail lizard',
+    42: 'agama',
+    43: 'frilled lizard, Chlamydosaurus kingi',
+    44: 'alligator lizard',
+    45: 'Gila monster, Heloderma suspectum',
+    46: 'green lizard, Lacerta viridis',
+    47: 'African chameleon, Chamaeleo chamaeleon',
+    48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+    49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+    50: 'American alligator, Alligator mississipiensis',
+    51: 'triceratops',
+    52: 'thunder snake, worm snake, Carphophis amoenus',
+    53: 'ringneck snake, ring-necked snake, ring snake',
+    54: 'hognose snake, puff adder, sand viper',
+    55: 'green snake, grass snake',
+    56: 'king snake, kingsnake',
+    57: 'garter snake, grass snake',
+    58: 'water snake',
+    59: 'vine snake',
+    60: 'night snake, Hypsiglena torquata',
+    61: 'boa constrictor, Constrictor constrictor',
+    62: 'rock python, rock snake, Python sebae',
+    63: 'Indian cobra, Naja naja',
+    64: 'green mamba',
+    65: 'sea snake',
+    66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+    67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+    68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+    69: 'trilobite',
+    70: 'harvestman, daddy longlegs, Phalangium opilio',
+    71: 'scorpion',
+    72: 'black and gold garden spider, Argiope aurantia',
+    73: 'barn spider, Araneus cavaticus',
+    74: 'garden spider, Aranea diademata',
+    75: 'black widow, Latrodectus mactans',
+    76: 'tarantula',
+    77: 'wolf spider, hunting spider',
+    78: 'tick',
+    79: 'centipede',
+    80: 'black grouse',
+    81: 'ptarmigan',
+    82: 'ruffed grouse, partridge, Bonasa umbellus',
+    83: 'prairie chicken, prairie grouse, prairie fowl',
+    84: 'peacock',
+    85: 'quail',
+    86: 'partridge',
+    87: 'African grey, African gray, Psittacus erithacus',
+    88: 'macaw',
+    89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+    90: 'lorikeet',
+    91: 'coucal',
+    92: 'bee eater',
+    93: 'hornbill',
+    94: 'hummingbird',
+    95: 'jacamar',
+    96: 'toucan',
+    97: 'drake',
+    98: 'red-breasted merganser, Mergus serrator',
+    99: 'goose',
+    100: 'black swan, Cygnus atratus',
+    101: 'tusker',
+    102: 'echidna, spiny anteater, anteater',
+    103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+    104: 'wallaby, brush kangaroo',
+    105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+    106: 'wombat',
+    107: 'jellyfish',
+    108: 'sea anemone, anemone',
+    109: 'brain coral',
+    110: 'flatworm, platyhelminth',
+    111: 'nematode, nematode worm, roundworm',
+    112: 'conch',
+    113: 'snail',
+    114: 'slug',
+    115: 'sea slug, nudibranch',
+    116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+    117: 'chambered nautilus, pearly nautilus, nautilus',
+    118: 'Dungeness crab, Cancer magister',
+    119: 'rock crab, Cancer irroratus',
+    120: 'fiddler crab',
+    121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+    122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+    123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+    124: 'crayfish, crawfish, crawdad, crawdaddy',
+    125: 'hermit crab',
+    126: 'isopod',
+    127: 'white stork, Ciconia ciconia',
+    128: 'black stork, Ciconia nigra',
+    129: 'spoonbill',
+    130: 'flamingo',
+    131: 'little blue heron, Egretta caerulea',
+    132: 'American egret, great white heron, Egretta albus',
+    133: 'bittern',
+    134: 'crane',
+    135: 'limpkin, Aramus pictus',
+    136: 'European gallinule, Porphyrio porphyrio',
+    137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+    138: 'bustard',
+    139: 'ruddy turnstone, Arenaria interpres',
+    140: 'red-backed sandpiper, dunlin, Erolia alpina',
+    141: 'redshank, Tringa totanus',
+    142: 'dowitcher',
+    143: 'oystercatcher, oyster catcher',
+    144: 'pelican',
+    145: 'king penguin, Aptenodytes patagonica',
+    146: 'albatross, mollymawk',
+    147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+    148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+    149: 'dugong, Dugong dugon',
+    150: 'sea lion',
+    151: 'Chihuahua',
+    152: 'Japanese spaniel',
+    153: 'Maltese dog, Maltese terrier, Maltese',
+    154: 'Pekinese, Pekingese, Peke',
+    155: 'Shih-Tzu',
+    156: 'Blenheim spaniel',
+    157: 'papillon',
+    158: 'toy terrier',
+    159: 'Rhodesian ridgeback',
+    160: 'Afghan hound, Afghan',
+    161: 'basset, basset hound',
+    162: 'beagle',
+    163: 'bloodhound, sleuthhound',
+    164: 'bluetick',
+    165: 'black-and-tan coonhound',
+    166: 'Walker hound, Walker foxhound',
+    167: 'English foxhound',
+    168: 'redbone',
+    169: 'borzoi, Russian wolfhound',
+    170: 'Irish wolfhound',
+    171: 'Italian greyhound',
+    172: 'whippet',
+    173: 'Ibizan hound, Ibizan Podenco',
+    174: 'Norwegian elkhound, elkhound',
+    175: 'otterhound, otter hound',
+    176: 'Saluki, gazelle hound',
+    177: 'Scottish deerhound, deerhound',
+    178: 'Weimaraner',
+    179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+    180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+    181: 'Bedlington terrier',
+    182: 'Border terrier',
+    183: 'Kerry blue terrier',
+    184: 'Irish terrier',
+    185: 'Norfolk terrier',
+    186: 'Norwich terrier',
+    187: 'Yorkshire terrier',
+    188: 'wire-haired fox terrier',
+    189: 'Lakeland terrier',
+    190: 'Sealyham terrier, Sealyham',
+    191: 'Airedale, Airedale terrier',
+    192: 'cairn, cairn terrier',
+    193: 'Australian terrier',
+    194: 'Dandie Dinmont, Dandie Dinmont terrier',
+    195: 'Boston bull, Boston terrier',
+    196: 'miniature schnauzer',
+    197: 'giant schnauzer',
+    198: 'standard schnauzer',
+    199: 'Scotch terrier, Scottish terrier, Scottie',
+    200: 'Tibetan terrier, chrysanthemum dog',
+    201: 'silky terrier, Sydney silky',
+    202: 'soft-coated wheaten terrier',
+    203: 'West Highland white terrier',
+    204: 'Lhasa, Lhasa apso',
+    205: 'flat-coated retriever',
+    206: 'curly-coated retriever',
+    207: 'golden retriever',
+    208: 'Labrador retriever',
+    209: 'Chesapeake Bay retriever',
+    210: 'German short-haired pointer',
+    211: 'vizsla, Hungarian pointer',
+    212: 'English setter',
+    213: 'Irish setter, red setter',
+    214: 'Gordon setter',
+    215: 'Brittany spaniel',
+    216: 'clumber, clumber spaniel',
+    217: 'English springer, English springer spaniel',
+    218: 'Welsh springer spaniel',
+    219: 'cocker spaniel, English cocker spaniel, cocker',
+    220: 'Sussex spaniel',
+    221: 'Irish water spaniel',
+    222: 'kuvasz',
+    223: 'schipperke',
+    224: 'groenendael',
+    225: 'malinois',
+    226: 'briard',
+    227: 'kelpie',
+    228: 'komondor',
+    229: 'Old English sheepdog, bobtail',
+    230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+    231: 'collie',
+    232: 'Border collie',
+    233: 'Bouvier des Flandres, Bouviers des Flandres',
+    234: 'Rottweiler',
+    235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+    236: 'Doberman, Doberman pinscher',
+    237: 'miniature pinscher',
+    238: 'Greater Swiss Mountain dog',
+    239: 'Bernese mountain dog',
+    240: 'Appenzeller',
+    241: 'EntleBucher',
+    242: 'boxer',
+    243: 'bull mastiff',
+    244: 'Tibetan mastiff',
+    245: 'French bulldog',
+    246: 'Great Dane',
+    247: 'Saint Bernard, St Bernard',
+    248: 'Eskimo dog, husky',
+    249: 'malamute, malemute, Alaskan malamute',
+    250: 'Siberian husky',
+    251: 'dalmatian, coach dog, carriage dog',
+    252: 'affenpinscher, monkey pinscher, monkey dog',
+    253: 'basenji',
+    254: 'pug, pug-dog',
+    255: 'Leonberg',
+    256: 'Newfoundland, Newfoundland dog',
+    257: 'Great Pyrenees',
+    258: 'Samoyed, Samoyede',
+    259: 'Pomeranian',
+    260: 'chow, chow chow',
+    261: 'keeshond',
+    262: 'Brabancon griffon',
+    263: 'Pembroke, Pembroke Welsh corgi',
+    264: 'Cardigan, Cardigan Welsh corgi',
+    265: 'toy poodle',
+    266: 'miniature poodle',
+    267: 'standard poodle',
+    268: 'Mexican hairless',
+    269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+    270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+    271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+    272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+    273: 'dingo, warrigal, warragal, Canis dingo',
+    274: 'dhole, Cuon alpinus',
+    275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+    276: 'hyena, hyaena',
+    277: 'red fox, Vulpes vulpes',
+    278: 'kit fox, Vulpes macrotis',
+    279: 'Arctic fox, white fox, Alopex lagopus',
+    280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+    281: 'tabby, tabby cat',
+    282: 'tiger cat',
+    283: 'Persian cat',
+    284: 'Siamese cat, Siamese',
+    285: 'Egyptian cat',
+    286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+    287: 'lynx, catamount',
+    288: 'leopard, Panthera pardus',
+    289: 'snow leopard, ounce, Panthera uncia',
+    290: 'jaguar, panther, Panthera onca, Felis onca',
+    291: 'lion, king of beasts, Panthera leo',
+    292: 'tiger, Panthera tigris',
+    293: 'cheetah, chetah, Acinonyx jubatus',
+    294: 'brown bear, bruin, Ursus arctos',
+    295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+    296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+    297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+    298: 'mongoose',
+    299: 'meerkat, mierkat',
+    300: 'tiger beetle',
+    301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+    302: 'ground beetle, carabid beetle',
+    303: 'long-horned beetle, longicorn, longicorn beetle',
+    304: 'leaf beetle, chrysomelid',
+    305: 'dung beetle',
+    306: 'rhinoceros beetle',
+    307: 'weevil',
+    308: 'fly',
+    309: 'bee',
+    310: 'ant, emmet, pismire',
+    311: 'grasshopper, hopper',
+    312: 'cricket',
+    313: 'walking stick, walkingstick, stick insect',
+    314: 'cockroach, roach',
+    315: 'mantis, mantid',
+    316: 'cicada, cicala',
+    317: 'leafhopper',
+    318: 'lacewing, lacewing fly',
+    319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+    320: 'damselfly',
+    321: 'admiral',
+    322: 'ringlet, ringlet butterfly',
+    323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+    324: 'cabbage butterfly',
+    325: 'sulphur butterfly, sulfur butterfly',
+    326: 'lycaenid, lycaenid butterfly',
+    327: 'starfish, sea star',
+    328: 'sea urchin',
+    329: 'sea cucumber, holothurian',
+    330: 'wood rabbit, cottontail, cottontail rabbit',
+    331: 'hare',
+    332: 'Angora, Angora rabbit',
+    333: 'hamster',
+    334: 'porcupine, hedgehog',
+    335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+    336: 'marmot',
+    337: 'beaver',
+    338: 'guinea pig, Cavia cobaya',
+    339: 'sorrel',
+    340: 'zebra',
+    341: 'hog, pig, grunter, squealer, Sus scrofa',
+    342: 'wild boar, boar, Sus scrofa',
+    343: 'warthog',
+    344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+    345: 'ox',
+    346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+    347: 'bison',
+    348: 'ram, tup',
+    349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+    350: 'ibex, Capra ibex',
+    351: 'hartebeest',
+    352: 'impala, Aepyceros melampus',
+    353: 'gazelle',
+    354: 'Arabian camel, dromedary, Camelus dromedarius',
+    355: 'llama',
+    356: 'weasel',
+    357: 'mink',
+    358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+    359: 'black-footed ferret, ferret, Mustela nigripes',
+    360: 'otter',
+    361: 'skunk, polecat, wood pussy',
+    362: 'badger',
+    363: 'armadillo',
+    364: 'three-toed sloth, ai, Bradypus tridactylus',
+    365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+    366: 'gorilla, Gorilla gorilla',
+    367: 'chimpanzee, chimp, Pan troglodytes',
+    368: 'gibbon, Hylobates lar',
+    369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+    370: 'guenon, guenon monkey',
+    371: 'patas, hussar monkey, Erythrocebus patas',
+    372: 'baboon',
+    373: 'macaque',
+    374: 'langur',
+    375: 'colobus, colobus monkey',
+    376: 'proboscis monkey, Nasalis larvatus',
+    377: 'marmoset',
+    378: 'capuchin, ringtail, Cebus capucinus',
+    379: 'howler monkey, howler',
+    380: 'titi, titi monkey',
+    381: 'spider monkey, Ateles geoffroyi',
+    382: 'squirrel monkey, Saimiri sciureus',
+    383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+    384: 'indri, indris, Indri indri, Indri brevicaudatus',
+    385: 'Indian elephant, Elephas maximus',
+    386: 'African elephant, Loxodonta africana',
+    387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+    388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+    389: 'barracouta, snoek',
+    390: 'eel',
+    391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+    392: 'rock beauty, Holocanthus tricolor',
+    393: 'anemone fish',
+    394: 'sturgeon',
+    395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+    396: 'lionfish',
+    397: 'puffer, pufferfish, blowfish, globefish',
+    398: 'abacus',
+    399: 'abaya',
+    400: "academic gown, academic robe, judge's robe",
+    401: 'accordion, piano accordion, squeeze box',
+    402: 'acoustic guitar',
+    403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+    404: 'airliner',
+    405: 'airship, dirigible',
+    406: 'altar',
+    407: 'ambulance',
+    408: 'amphibian, amphibious vehicle',
+    409: 'analog clock',
+    410: 'apiary, bee house',
+    411: 'apron',
+    412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+    413: 'assault rifle, assault gun',
+    414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+    415: 'bakery, bakeshop, bakehouse',
+    416: 'balance beam, beam',
+    417: 'balloon',
+    418: 'ballpoint, ballpoint pen, ballpen, Biro',
+    419: 'Band Aid',
+    420: 'banjo',
+    421: 'bannister, banister, balustrade, balusters, handrail',
+    422: 'barbell',
+    423: 'barber chair',
+    424: 'barbershop',
+    425: 'barn',
+    426: 'barometer',
+    427: 'barrel, cask',
+    428: 'barrow, garden cart, lawn cart, wheelbarrow',
+    429: 'baseball',
+    430: 'basketball',
+    431: 'bassinet',
+    432: 'bassoon',
+    433: 'bathing cap, swimming cap',
+    434: 'bath towel',
+    435: 'bathtub, bathing tub, bath, tub',
+    436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+    437: 'beacon, lighthouse, beacon light, pharos',
+    438: 'beaker',
+    439: 'bearskin, busby, shako',
+    440: 'beer bottle',
+    441: 'beer glass',
+    442: 'bell cote, bell cot',
+    443: 'bib',
+    444: 'bicycle-built-for-two, tandem bicycle, tandem',
+    445: 'bikini, two-piece',
+    446: 'binder, ring-binder',
+    447: 'binoculars, field glasses, opera glasses',
+    448: 'birdhouse',
+    449: 'boathouse',
+    450: 'bobsled, bobsleigh, bob',
+    451: 'bolo tie, bolo, bola tie, bola',
+    452: 'bonnet, poke bonnet',
+    453: 'bookcase',
+    454: 'bookshop, bookstore, bookstall',
+    455: 'bottlecap',
+    456: 'bow',
+    457: 'bow tie, bow-tie, bowtie',
+    458: 'brass, memorial tablet, plaque',
+    459: 'brassiere, bra, bandeau',
+    460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+    461: 'breastplate, aegis, egis',
+    462: 'broom',
+    463: 'bucket, pail',
+    464: 'buckle',
+    465: 'bulletproof vest',
+    466: 'bullet train, bullet',
+    467: 'butcher shop, meat market',
+    468: 'cab, hack, taxi, taxicab',
+    469: 'caldron, cauldron',
+    470: 'candle, taper, wax light',
+    471: 'cannon',
+    472: 'canoe',
+    473: 'can opener, tin opener',
+    474: 'cardigan',
+    475: 'car mirror',
+    476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+    477: "carpenter's kit, tool kit",
+    478: 'carton',
+    479: 'car wheel',
+    480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+    481: 'cassette',
+    482: 'cassette player',
+    483: 'castle',
+    484: 'catamaran',
+    485: 'CD player',
+    486: 'cello, violoncello',
+    487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+    488: 'chain',
+    489: 'chainlink fence',
+    490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+    491: 'chain saw, chainsaw',
+    492: 'chest',
+    493: 'chiffonier, commode',
+    494: 'chime, bell, gong',
+    495: 'china cabinet, china closet',
+    496: 'Christmas stocking',
+    497: 'church, church building',
+    498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+    499: 'cleaver, meat cleaver, chopper',
+    500: 'cliff dwelling',
+    501: 'cloak',
+    502: 'clog, geta, patten, sabot',
+    503: 'cocktail shaker',
+    504: 'coffee mug',
+    505: 'coffeepot',
+    506: 'coil, spiral, volute, whorl, helix',
+    507: 'combination lock',
+    508: 'computer keyboard, keypad',
+    509: 'confectionery, confectionary, candy store',
+    510: 'container ship, containership, container vessel',
+    511: 'convertible',
+    512: 'corkscrew, bottle screw',
+    513: 'cornet, horn, trumpet, trump',
+    514: 'cowboy boot',
+    515: 'cowboy hat, ten-gallon hat',
+    516: 'cradle',
+    517: 'crane',
+    518: 'crash helmet',
+    519: 'crate',
+    520: 'crib, cot',
+    521: 'Crock Pot',
+    522: 'croquet ball',
+    523: 'crutch',
+    524: 'cuirass',
+    525: 'dam, dike, dyke',
+    526: 'desk',
+    527: 'desktop computer',
+    528: 'dial telephone, dial phone',
+    529: 'diaper, nappy, napkin',
+    530: 'digital clock',
+    531: 'digital watch',
+    532: 'dining table, board',
+    533: 'dishrag, dishcloth',
+    534: 'dishwasher, dish washer, dishwashing machine',
+    535: 'disk brake, disc brake',
+    536: 'dock, dockage, docking facility',
+    537: 'dogsled, dog sled, dog sleigh',
+    538: 'dome',
+    539: 'doormat, welcome mat',
+    540: 'drilling platform, offshore rig',
+    541: 'drum, membranophone, tympan',
+    542: 'drumstick',
+    543: 'dumbbell',
+    544: 'Dutch oven',
+    545: 'electric fan, blower',
+    546: 'electric guitar',
+    547: 'electric locomotive',
+    548: 'entertainment center',
+    549: 'envelope',
+    550: 'espresso maker',
+    551: 'face powder',
+    552: 'feather boa, boa',
+    553: 'file, file cabinet, filing cabinet',
+    554: 'fireboat',
+    555: 'fire engine, fire truck',
+    556: 'fire screen, fireguard',
+    557: 'flagpole, flagstaff',
+    558: 'flute, transverse flute',
+    559: 'folding chair',
+    560: 'football helmet',
+    561: 'forklift',
+    562: 'fountain',
+    563: 'fountain pen',
+    564: 'four-poster',
+    565: 'freight car',
+    566: 'French horn, horn',
+    567: 'frying pan, frypan, skillet',
+    568: 'fur coat',
+    569: 'garbage truck, dustcart',
+    570: 'gasmask, respirator, gas helmet',
+    571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+    572: 'goblet',
+    573: 'go-kart',
+    574: 'golf ball',
+    575: 'golfcart, golf cart',
+    576: 'gondola',
+    577: 'gong, tam-tam',
+    578: 'gown',
+    579: 'grand piano, grand',
+    580: 'greenhouse, nursery, glasshouse',
+    581: 'grille, radiator grille',
+    582: 'grocery store, grocery, food market, market',
+    583: 'guillotine',
+    584: 'hair slide',
+    585: 'hair spray',
+    586: 'half track',
+    587: 'hammer',
+    588: 'hamper',
+    589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+    590: 'hand-held computer, hand-held microcomputer',
+    591: 'handkerchief, hankie, hanky, hankey',
+    592: 'hard disc, hard disk, fixed disk',
+    593: 'harmonica, mouth organ, harp, mouth harp',
+    594: 'harp',
+    595: 'harvester, reaper',
+    596: 'hatchet',
+    597: 'holster',
+    598: 'home theater, home theatre',
+    599: 'honeycomb',
+    600: 'hook, claw',
+    601: 'hoopskirt, crinoline',
+    602: 'horizontal bar, high bar',
+    603: 'horse cart, horse-cart',
+    604: 'hourglass',
+    605: 'iPod',
+    606: 'iron, smoothing iron',
+    607: "jack-o'-lantern",
+    608: 'jean, blue jean, denim',
+    609: 'jeep, landrover',
+    610: 'jersey, T-shirt, tee shirt',
+    611: 'jigsaw puzzle',
+    612: 'jinrikisha, ricksha, rickshaw',
+    613: 'joystick',
+    614: 'kimono',
+    615: 'knee pad',
+    616: 'knot',
+    617: 'lab coat, laboratory coat',
+    618: 'ladle',
+    619: 'lampshade, lamp shade',
+    620: 'laptop, laptop computer',
+    621: 'lawn mower, mower',
+    622: 'lens cap, lens cover',
+    623: 'letter opener, paper knife, paperknife',
+    624: 'library',
+    625: 'lifeboat',
+    626: 'lighter, light, igniter, ignitor',
+    627: 'limousine, limo',
+    628: 'liner, ocean liner',
+    629: 'lipstick, lip rouge',
+    630: 'Loafer',
+    631: 'lotion',
+    632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+    633: "loupe, jeweler's loupe",
+    634: 'lumbermill, sawmill',
+    635: 'magnetic compass',
+    636: 'mailbag, postbag',
+    637: 'mailbox, letter box',
+    638: 'maillot',
+    639: 'maillot, tank suit',
+    640: 'manhole cover',
+    641: 'maraca',
+    642: 'marimba, xylophone',
+    643: 'mask',
+    644: 'matchstick',
+    645: 'maypole',
+    646: 'maze, labyrinth',
+    647: 'measuring cup',
+    648: 'medicine chest, medicine cabinet',
+    649: 'megalith, megalithic structure',
+    650: 'microphone, mike',
+    651: 'microwave, microwave oven',
+    652: 'military uniform',
+    653: 'milk can',
+    654: 'minibus',
+    655: 'miniskirt, mini',
+    656: 'minivan',
+    657: 'missile',
+    658: 'mitten',
+    659: 'mixing bowl',
+    660: 'mobile home, manufactured home',
+    661: 'Model T',
+    662: 'modem',
+    663: 'monastery',
+    664: 'monitor',
+    665: 'moped',
+    666: 'mortar',
+    667: 'mortarboard',
+    668: 'mosque',
+    669: 'mosquito net',
+    670: 'motor scooter, scooter',
+    671: 'mountain bike, all-terrain bike, off-roader',
+    672: 'mountain tent',
+    673: 'mouse, computer mouse',
+    674: 'mousetrap',
+    675: 'moving van',
+    676: 'muzzle',
+    677: 'nail',
+    678: 'neck brace',
+    679: 'necklace',
+    680: 'nipple',
+    681: 'notebook, notebook computer',
+    682: 'obelisk',
+    683: 'oboe, hautboy, hautbois',
+    684: 'ocarina, sweet potato',
+    685: 'odometer, hodometer, mileometer, milometer',
+    686: 'oil filter',
+    687: 'organ, pipe organ',
+    688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+    689: 'overskirt',
+    690: 'oxcart',
+    691: 'oxygen mask',
+    692: 'packet',
+    693: 'paddle, boat paddle',
+    694: 'paddlewheel, paddle wheel',
+    695: 'padlock',
+    696: 'paintbrush',
+    697: "pajama, pyjama, pj's, jammies",
+    698: 'palace',
+    699: 'panpipe, pandean pipe, syrinx',
+    700: 'paper towel',
+    701: 'parachute, chute',
+    702: 'parallel bars, bars',
+    703: 'park bench',
+    704: 'parking meter',
+    705: 'passenger car, coach, carriage',
+    706: 'patio, terrace',
+    707: 'pay-phone, pay-station',
+    708: 'pedestal, plinth, footstall',
+    709: 'pencil box, pencil case',
+    710: 'pencil sharpener',
+    711: 'perfume, essence',
+    712: 'Petri dish',
+    713: 'photocopier',
+    714: 'pick, plectrum, plectron',
+    715: 'pickelhaube',
+    716: 'picket fence, paling',
+    717: 'pickup, pickup truck',
+    718: 'pier',
+    719: 'piggy bank, penny bank',
+    720: 'pill bottle',
+    721: 'pillow',
+    722: 'ping-pong ball',
+    723: 'pinwheel',
+    724: 'pirate, pirate ship',
+    725: 'pitcher, ewer',
+    726: "plane, carpenter's plane, woodworking plane",
+    727: 'planetarium',
+    728: 'plastic bag',
+    729: 'plate rack',
+    730: 'plow, plough',
+    731: "plunger, plumber's helper",
+    732: 'Polaroid camera, Polaroid Land camera',
+    733: 'pole',
+    734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+    735: 'poncho',
+    736: 'pool table, billiard table, snooker table',
+    737: 'pop bottle, soda bottle',
+    738: 'pot, flowerpot',
+    739: "potter's wheel",
+    740: 'power drill',
+    741: 'prayer rug, prayer mat',
+    742: 'printer',
+    743: 'prison, prison house',
+    744: 'projectile, missile',
+    745: 'projector',
+    746: 'puck, hockey puck',
+    747: 'punching bag, punch bag, punching ball, punchball',
+    748: 'purse',
+    749: 'quill, quill pen',
+    750: 'quilt, comforter, comfort, puff',
+    751: 'racer, race car, racing car',
+    752: 'racket, racquet',
+    753: 'radiator',
+    754: 'radio, wireless',
+    755: 'radio telescope, radio reflector',
+    756: 'rain barrel',
+    757: 'recreational vehicle, RV, R.V.',
+    758: 'reel',
+    759: 'reflex camera',
+    760: 'refrigerator, icebox',
+    761: 'remote control, remote',
+    762: 'restaurant, eating house, eating place, eatery',
+    763: 'revolver, six-gun, six-shooter',
+    764: 'rifle',
+    765: 'rocking chair, rocker',
+    766: 'rotisserie',
+    767: 'rubber eraser, rubber, pencil eraser',
+    768: 'rugby ball',
+    769: 'rule, ruler',
+    770: 'running shoe',
+    771: 'safe',
+    772: 'safety pin',
+    773: 'saltshaker, salt shaker',
+    774: 'sandal',
+    775: 'sarong',
+    776: 'sax, saxophone',
+    777: 'scabbard',
+    778: 'scale, weighing machine',
+    779: 'school bus',
+    780: 'schooner',
+    781: 'scoreboard',
+    782: 'screen, CRT screen',
+    783: 'screw',
+    784: 'screwdriver',
+    785: 'seat belt, seatbelt',
+    786: 'sewing machine',
+    787: 'shield, buckler',
+    788: 'shoe shop, shoe-shop, shoe store',
+    789: 'shoji',
+    790: 'shopping basket',
+    791: 'shopping cart',
+    792: 'shovel',
+    793: 'shower cap',
+    794: 'shower curtain',
+    795: 'ski',
+    796: 'ski mask',
+    797: 'sleeping bag',
+    798: 'slide rule, slipstick',
+    799: 'sliding door',
+    800: 'slot, one-armed bandit',
+    801: 'snorkel',
+    802: 'snowmobile',
+    803: 'snowplow, snowplough',
+    804: 'soap dispenser',
+    805: 'soccer ball',
+    806: 'sock',
+    807: 'solar dish, solar collector, solar furnace',
+    808: 'sombrero',
+    809: 'soup bowl',
+    810: 'space bar',
+    811: 'space heater',
+    812: 'space shuttle',
+    813: 'spatula',
+    814: 'speedboat',
+    815: "spider web, spider's web",
+    816: 'spindle',
+    817: 'sports car, sport car',
+    818: 'spotlight, spot',
+    819: 'stage',
+    820: 'steam locomotive',
+    821: 'steel arch bridge',
+    822: 'steel drum',
+    823: 'stethoscope',
+    824: 'stole',
+    825: 'stone wall',
+    826: 'stopwatch, stop watch',
+    827: 'stove',
+    828: 'strainer',
+    829: 'streetcar, tram, tramcar, trolley, trolley car',
+    830: 'stretcher',
+    831: 'studio couch, day bed',
+    832: 'stupa, tope',
+    833: 'submarine, pigboat, sub, U-boat',
+    834: 'suit, suit of clothes',
+    835: 'sundial',
+    836: 'sunglass',
+    837: 'sunglasses, dark glasses, shades',
+    838: 'sunscreen, sunblock, sun blocker',
+    839: 'suspension bridge',
+    840: 'swab, swob, mop',
+    841: 'sweatshirt',
+    842: 'swimming trunks, bathing trunks',
+    843: 'swing',
+    844: 'switch, electric switch, electrical switch',
+    845: 'syringe',
+    846: 'table lamp',
+    847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+    848: 'tape player',
+    849: 'teapot',
+    850: 'teddy, teddy bear',
+    851: 'television, television system',
+    852: 'tennis ball',
+    853: 'thatch, thatched roof',
+    854: 'theater curtain, theatre curtain',
+    855: 'thimble',
+    856: 'thresher, thrasher, threshing machine',
+    857: 'throne',
+    858: 'tile roof',
+    859: 'toaster',
+    860: 'tobacco shop, tobacconist shop, tobacconist',
+    861: 'toilet seat',
+    862: 'torch',
+    863: 'totem pole',
+    864: 'tow truck, tow car, wrecker',
+    865: 'toyshop',
+    866: 'tractor',
+    867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+    868: 'tray',
+    869: 'trench coat',
+    870: 'tricycle, trike, velocipede',
+    871: 'trimaran',
+    872: 'tripod',
+    873: 'triumphal arch',
+    874: 'trolleybus, trolley coach, trackless trolley',
+    875: 'trombone',
+    876: 'tub, vat',
+    877: 'turnstile',
+    878: 'typewriter keyboard',
+    879: 'umbrella',
+    880: 'unicycle, monocycle',
+    881: 'upright, upright piano',
+    882: 'vacuum, vacuum cleaner',
+    883: 'vase',
+    884: 'vault',
+    885: 'velvet',
+    886: 'vending machine',
+    887: 'vestment',
+    888: 'viaduct',
+    889: 'violin, fiddle',
+    890: 'volleyball',
+    891: 'waffle iron',
+    892: 'wall clock',
+    893: 'wallet, billfold, notecase, pocketbook',
+    894: 'wardrobe, closet, press',
+    895: 'warplane, military plane',
+    896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+    897: 'washer, automatic washer, washing machine',
+    898: 'water bottle',
+    899: 'water jug',
+    900: 'water tower',
+    901: 'whiskey jug',
+    902: 'whistle',
+    903: 'wig',
+    904: 'window screen',
+    905: 'window shade',
+    906: 'Windsor tie',
+    907: 'wine bottle',
+    908: 'wing',
+    909: 'wok',
+    910: 'wooden spoon',
+    911: 'wool, woolen, woollen',
+    912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+    913: 'wreck',
+    914: 'yawl',
+    915: 'yurt',
+    916: 'web site, website, internet site, site',
+    917: 'comic book',
+    918: 'crossword puzzle, crossword',
+    919: 'street sign',
+    920: 'traffic light, traffic signal, stoplight',
+    921: 'book jacket, dust cover, dust jacket, dust wrapper',
+    922: 'menu',
+    923: 'plate',
+    924: 'guacamole',
+    925: 'consomme',
+    926: 'hot pot, hotpot',
+    927: 'trifle',
+    928: 'ice cream, icecream',
+    929: 'ice lolly, lolly, lollipop, popsicle',
+    930: 'French loaf',
+    931: 'bagel, beigel',
+    932: 'pretzel',
+    933: 'cheeseburger',
+    934: 'hotdog, hot dog, red hot',
+    935: 'mashed potato',
+    936: 'head cabbage',
+    937: 'broccoli',
+    938: 'cauliflower',
+    939: 'zucchini, courgette',
+    940: 'spaghetti squash',
+    941: 'acorn squash',
+    942: 'butternut squash',
+    943: 'cucumber, cuke',
+    944: 'artichoke, globe artichoke',
+    945: 'bell pepper',
+    946: 'cardoon',
+    947: 'mushroom',
+    948: 'Granny Smith',
+    949: 'strawberry',
+    950: 'orange',
+    951: 'lemon',
+    952: 'fig',
+    953: 'pineapple, ananas',
+    954: 'banana',
+    955: 'jackfruit, jak, jack',
+    956: 'custard apple',
+    957: 'pomegranate',
+    958: 'hay',
+    959: 'carbonara',
+    960: 'chocolate sauce, chocolate syrup',
+    961: 'dough',
+    962: 'meat loaf, meatloaf',
+    963: 'pizza, pizza pie',
+    964: 'potpie',
+    965: 'burrito',
+    966: 'red wine',
+    967: 'espresso',
+    968: 'cup',
+    969: 'eggnog',
+    970: 'alp',
+    971: 'bubble',
+    972: 'cliff, drop, drop-off',
+    973: 'coral reef',
+    974: 'geyser',
+    975: 'lakeside, lakeshore',
+    976: 'promontory, headland, head, foreland',
+    977: 'sandbar, sand bar',
+    978: 'seashore, coast, seacoast, sea-coast',
+    979: 'valley, vale',
+    980: 'volcano',
+    981: 'ballplayer, baseball player',
+    982: 'groom, bridegroom',
+    983: 'scuba diver',
+    984: 'rapeseed',
+    985: 'daisy',
+    986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+    987: 'corn',
+    988: 'acorn',
+    989: 'hip, rose hip, rosehip',
+    990: 'buckeye, horse chestnut, conker',
+    991: 'coral fungus',
+    992: 'agaric',
+    993: 'gyromitra',
+    994: 'stinkhorn, carrion fungus',
+    995: 'earthstar',
+    996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+    997: 'bolete',
+    998: 'ear, spike, capitulum',
+    999: 'toilet tissue, toilet paper, bathroom tissue'
+}

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/data/transforms.py ADDED Viewed

	@@ -0,0 +1,442 @@

+from __future__ import division
+import sys
+import random
+from PIL import Image
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import numbers
+import collections
+from torchvision.transforms import functional as F
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+_pil_interpolation_to_str = {
+    Image.NEAREST: 'PIL.Image.NEAREST',
+    Image.BILINEAR: 'PIL.Image.BILINEAR',
+    Image.BICUBIC: 'PIL.Image.BICUBIC',
+    Image.LANCZOS: 'PIL.Image.LANCZOS',
+    Image.HAMMING: 'PIL.Image.HAMMING',
+    Image.BOX: 'PIL.Image.BOX',
+}
+class Compose(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, tgt):
+        for t in self.transforms:
+            img, tgt = t(img, tgt)
+        return img, tgt
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be scaled.
+        Returns:
+            PIL Image: Rescaled image.
+        """
+        return F.resize(img, self.size, self.interpolation), F.resize(tgt, self.size, Image.NEAREST)
+    def __repr__(self):
+        interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str)
+class CenterCrop(object):
+    """Crops the given PIL Image at the center.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        return F.center_crop(img, self.size), F.center_crop(tgt, self.size)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+class RandomCrop(object):
+    """Crop the given PIL Image at a random location.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is None, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively. If a sequence of length 2 is provided, it is used to
+            pad left/right, top/bottom borders, respectively.
+        pad_if_needed (boolean): It will pad the image if smaller than the
+            desired size to avoid raising an exception.
+        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+             - constant: pads with a constant value, this value is specified with fill
+             - edge: pads with the last value on the edge of the image
+             - reflect: pads with reflection of image (without repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                will result in [3, 2, 1, 2, 3, 4, 3, 2]
+             - symmetric: pads with reflection of image (repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                will result in [2, 1, 1, 2, 3, 4, 4, 3]
+    """
+    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+    @staticmethod
+    def get_params(img, output_size):
+        """Get parameters for ``crop`` for a random crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+        i = random.randint(0, h - th)
+        j = random.randint(0, w - tw)
+        return i, j, th, tw
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        if self.padding is not None:
+            img = F.pad(img, self.padding, self.fill, self.padding_mode)
+            tgt = F.pad(tgt, self.padding, self.fill, self.padding_mode)
+        # pad the width if needed
+        if self.pad_if_needed and img.size[0] < self.size[1]:
+            img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+        # pad the height if needed
+        if self.pad_if_needed and img.size[1] < self.size[0]:
+            img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+        i, j, h, w = self.get_params(img, self.size)
+        return F.crop(img, i, j, h, w), F.crop(tgt, i, j, h, w)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
+class RandomHorizontalFlip(object):
+    """Horizontally flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.hflip(img), F.hflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class RandomVerticalFlip(object):
+    """Vertically flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.vflip(img), F.vflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd).__name__) + " object is not callable"
+        self.lambd = lambd
+    def __call__(self, img, tgt):
+        return self.lambd(img, tgt)
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+class ColorJitter(object):
+    """Randomly change the brightness, contrast and saturation of an image.
+    Args:
+        brightness (float or tuple of float (min, max)): How much to jitter brightness.
+            brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+            or the given [min, max]. Should be non negative numbers.
+        contrast (float or tuple of float (min, max)): How much to jitter contrast.
+            contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+            or the given [min, max]. Should be non negative numbers.
+        saturation (float or tuple of float (min, max)): How much to jitter saturation.
+            saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+            or the given [min, max]. Should be non negative numbers.
+        hue (float or tuple of float (min, max)): How much to jitter hue.
+            hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+            Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        self.brightness = self._check_input(brightness, 'brightness')
+        self.contrast = self._check_input(contrast, 'contrast')
+        self.saturation = self._check_input(saturation, 'saturation')
+        self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                     clip_first_on_zero=False)
+    def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
+        if isinstance(value, numbers.Number):
+            if value < 0:
+                raise ValueError("If {} is a single number, it must be non negative.".format(name))
+            value = [center - value, center + value]
+            if clip_first_on_zero:
+                value[0] = max(value[0], 0)
+        elif isinstance(value, (tuple, list)) and len(value) == 2:
+            if not bound[0] <= value[0] <= value[1] <= bound[1]:
+                raise ValueError("{} values should be between {}".format(name, bound))
+        else:
+            raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name))
+        # if value is 0 or (1., 1.) for brightness/contrast/saturation
+        # or (0., 0.) for hue, do nothing
+        if value[0] == value[1] == center:
+            value = None
+        return value
+    @staticmethod
+    def get_params(brightness, contrast, saturation, hue):
+        """Get a randomized transform to be applied on image.
+        Arguments are same as that of __init__.
+        Returns:
+            Transform which randomly adjusts brightness, contrast and
+            saturation in a random order.
+        """
+        transforms = []
+        if brightness is not None:
+            brightness_factor = random.uniform(brightness[0], brightness[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_brightness(img, brightness_factor), tgt)))
+        if contrast is not None:
+            contrast_factor = random.uniform(contrast[0], contrast[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_contrast(img, contrast_factor), tgt)))
+        if saturation is not None:
+            saturation_factor = random.uniform(saturation[0], saturation[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_saturation(img, saturation_factor), tgt)))
+        if hue is not None:
+            hue_factor = random.uniform(hue[0], hue[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_hue(img, hue_factor), tgt)))
+        random.shuffle(transforms)
+        transform = Compose(transforms)
+        return transform
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Input image.
+        Returns:
+            PIL Image: Color jittered image.
+        """
+        transform = self.get_params(self.brightness, self.contrast,
+                                    self.saturation, self.hue)
+        return transform(img, tgt)
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        format_string += 'brightness={0}'.format(self.brightness)
+        format_string += ', contrast={0}'.format(self.contrast)
+        format_string += ', saturation={0}'.format(self.saturation)
+        format_string += ', hue={0})'.format(self.hue)
+        return format_string
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+    .. note::
+        This transform acts out of place, i.e., it does not mutates the input tensor.
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std, inplace=False):
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+    def __call__(self, img, tgt):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        # return F.normalize(img, self.mean, self.std, self.inplace), tgt
+        return F.normalize(img, self.mean, self.std), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
+class ToTensor(object):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
+    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+    or if the numpy.ndarray has dtype = np.uint8
+    In the other cases, tensors are returned without scaling.
+    """
+    def __call__(self, img, tgt):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+        Returns:
+            Tensor: Converted image.
+        """
+        return F.to_tensor(img), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '()'

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/generate_visualizations.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import os
+from tqdm import tqdm
+import h5py
+import argparse
+# Import saliency methods and models
+from misc_functions import *
+from ViT_explanation_generator import Baselines, LRP
+from ViT_new import vit_base_patch16_224
+from ViT_LRP import vit_base_patch16_224 as vit_LRP
+from ViT_orig_LRP import vit_base_patch16_224 as vit_orig_LRP
+from torchvision.datasets import ImageNet
+def normalize(tensor,
+              mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+    dtype = tensor.dtype
+    mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+    std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+    tensor.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
+    return tensor
+def compute_saliency_and_save(args):
+    first = True
+    with h5py.File(os.path.join(args.method_dir, 'results.hdf5'), 'a') as f:
+        data_cam = f.create_dataset('vis',
+                                    (1, 1, 224, 224),
+                                    maxshape=(None, 1, 224, 224),
+                                    dtype=np.float32,
+                                    compression="gzip")
+        data_image = f.create_dataset('image',
+                                      (1, 3, 224, 224),
+                                      maxshape=(None, 3, 224, 224),
+                                      dtype=np.float32,
+                                      compression="gzip")
+        data_target = f.create_dataset('target',
+                                       (1,),
+                                       maxshape=(None,),
+                                       dtype=np.int32,
+                                       compression="gzip")
+        for batch_idx, (data, target) in enumerate(tqdm(sample_loader)):
+            if first:
+                first = False
+                data_cam.resize(data_cam.shape[0] + data.shape[0] - 1, axis=0)
+                data_image.resize(data_image.shape[0] + data.shape[0] - 1, axis=0)
+                data_target.resize(data_target.shape[0] + data.shape[0] - 1, axis=0)
+            else:
+                data_cam.resize(data_cam.shape[0] + data.shape[0], axis=0)
+                data_image.resize(data_image.shape[0] + data.shape[0], axis=0)
+                data_target.resize(data_target.shape[0] + data.shape[0], axis=0)
+            # Add data
+            data_image[-data.shape[0]:] = data.data.cpu().numpy()
+            data_target[-data.shape[0]:] = target.data.cpu().numpy()
+            target = target.to(device)
+            data = normalize(data)
+            data = data.to(device)
+            data.requires_grad_()
+            index = None
+            if args.vis_class == 'target':
+                index = target
+            if args.method == 'rollout':
+                Res = baselines.generate_rollout(data, start_layer=1).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'lrp':
+                Res = lrp.generate_LRP(data, start_layer=1, index=index).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'transformer_attribution':
+                Res = lrp.generate_LRP(data, start_layer=1, method="grad", index=index).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'full_lrp':
+                Res = orig_lrp.generate_LRP(data, method="full", index=index).reshape(data.shape[0], 1, 224, 224)
+                # Res = Res - Res.mean()
+            elif args.method == 'lrp_last_layer':
+                Res = orig_lrp.generate_LRP(data, method="last_layer", is_ablation=args.is_ablation, index=index) \
+                    .reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'attn_last_layer':
+                Res = lrp.generate_LRP(data, method="last_layer_attn", is_ablation=args.is_ablation) \
+                    .reshape(data.shape[0], 1, 14, 14)
+            elif args.method == 'attn_gradcam':
+                Res = baselines.generate_cam_attn(data, index=index).reshape(data.shape[0], 1, 14, 14)
+            if args.method != 'full_lrp' and args.method != 'input_grads':
+                Res = torch.nn.functional.interpolate(Res, scale_factor=16, mode='bilinear').cuda()
+            Res = (Res - Res.min()) / (Res.max() - Res.min())
+            data_cam[-data.shape[0]:] = Res.data.cpu().numpy()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train a segmentation')
+    parser.add_argument('--batch-size', type=int,
+                        default=1,
+                        help='')
+    parser.add_argument('--method', type=str,
+                        default='grad_rollout',
+                        choices=['rollout', 'lrp', 'transformer_attribution', 'full_lrp', 'lrp_last_layer',
+                                 'attn_last_layer', 'attn_gradcam'],
+                        help='')
+    parser.add_argument('--lmd', type=float,
+                        default=10,
+                        help='')
+    parser.add_argument('--vis-class', type=str,
+                        default='top',
+                        choices=['top', 'target', 'index'],
+                        help='')
+    parser.add_argument('--class-id', type=int,
+                        default=0,
+                        help='')
+    parser.add_argument('--cls-agn', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-ia', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-fx', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-fgx', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-m', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-reg', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--is-ablation', type=bool,
+                        default=False,
+                        help='')
+    parser.add_argument('--imagenet-validation-path', type=str,
+                        required=True,
+                        help='')
+    args = parser.parse_args()
+    # PATH variables
+    PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
+    os.makedirs(os.path.join(PATH, 'visualizations'), exist_ok=True)
+    try:
+        os.remove(os.path.join(PATH, 'visualizations/{}/{}/results.hdf5'.format(args.method,
+                                                                                args.vis_class)))
+    except OSError:
+        pass
+    os.makedirs(os.path.join(PATH, 'visualizations/{}'.format(args.method)), exist_ok=True)
+    if args.vis_class == 'index':
+        os.makedirs(os.path.join(PATH, 'visualizations/{}/{}_{}'.format(args.method,
+                                                                        args.vis_class,
+                                                                        args.class_id)), exist_ok=True)
+        args.method_dir = os.path.join(PATH, 'visualizations/{}/{}_{}'.format(args.method,
+                                                                              args.vis_class,
+                                                                              args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        os.makedirs(os.path.join(PATH, 'visualizations/{}/{}/{}'.format(args.method,
+                                                                     args.vis_class, ablation_fold)), exist_ok=True)
+        args.method_dir = os.path.join(PATH, 'visualizations/{}/{}/{}'.format(args.method,
+                                                                           args.vis_class, ablation_fold))
+    cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if cuda else "cpu")
+    # Model
+    model = vit_base_patch16_224(pretrained=True).cuda()
+    baselines = Baselines(model)
+    # LRP
+    model_LRP = vit_LRP(pretrained=True).cuda()
+    model_LRP.eval()
+    lrp = LRP(model_LRP)
+    # orig LRP
+    model_orig_LRP = vit_orig_LRP(pretrained=True).cuda()
+    model_orig_LRP.eval()
+    orig_lrp = LRP(model_orig_LRP)
+    # Dataset loader for sample images
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+    ])
+    imagenet_ds = ImageNet(args.imagenet_validation_path, split='val', download=False, transform=transform)
+    sample_loader = torch.utils.data.DataLoader(
+        imagenet_ds,
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=4
+    )
+    compute_saliency_and_save(args)

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/helpers.py ADDED Viewed

	@@ -0,0 +1,295 @@

+""" Model creation / weight loading / state_dict helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import logging
+import os
+import math
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Callable
+import torch
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+_logger = logging.getLogger(__name__)
+def load_state_dict(checkpoint_path, use_ema=False):
+    if checkpoint_path and os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        state_dict_key = 'state_dict'
+        if isinstance(checkpoint, dict):
+            if use_ema and 'state_dict_ema' in checkpoint:
+                state_dict_key = 'state_dict_ema'
+        if state_dict_key and state_dict_key in checkpoint:
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint[state_dict_key].items():
+                # strip `module.` prefix
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            state_dict = new_state_dict
+        else:
+            state_dict = checkpoint
+        _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
+        return state_dict
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True):
+    state_dict = load_state_dict(checkpoint_path, use_ema)
+    model.load_state_dict(state_dict, strict=strict)
+def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
+    resume_epoch = None
+    if os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            if log_info:
+                _logger.info('Restoring model state from checkpoint...')
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint['state_dict'].items():
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            model.load_state_dict(new_state_dict)
+            if optimizer is not None and 'optimizer' in checkpoint:
+                if log_info:
+                    _logger.info('Restoring optimizer state from checkpoint...')
+                optimizer.load_state_dict(checkpoint['optimizer'])
+            if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint:
+                if log_info:
+                    _logger.info('Restoring AMP loss scaler state from checkpoint...')
+                loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key])
+            if 'epoch' in checkpoint:
+                resume_epoch = checkpoint['epoch']
+                if 'version' in checkpoint and checkpoint['version'] > 1:
+                    resume_epoch += 1  # start at the next epoch, old checkpoints incremented before save
+            if log_info:
+                _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
+        else:
+            model.load_state_dict(checkpoint)
+            if log_info:
+                _logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
+        return resume_epoch
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True):
+    if cfg is None:
+        cfg = getattr(model, 'default_cfg')
+    if cfg is None or 'url' not in cfg or not cfg['url']:
+        _logger.warning("Pretrained model URL is invalid, using random initialization.")
+        return
+    state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    if in_chans == 1:
+        conv1_name = cfg['first_conv']
+        _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name)
+        conv1_weight = state_dict[conv1_name + '.weight']
+        # Some weights are in torch.half, ensure it's float for sum on CPU
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I > 3:
+            assert conv1_weight.shape[1] % 3 == 0
+            # For models with space2depth stems
+            conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K)
+            conv1_weight = conv1_weight.sum(dim=2, keepdim=False)
+        else:
+            conv1_weight = conv1_weight.sum(dim=1, keepdim=True)
+        conv1_weight = conv1_weight.to(conv1_type)
+        state_dict[conv1_name + '.weight'] = conv1_weight
+    elif in_chans != 3:
+        conv1_name = cfg['first_conv']
+        conv1_weight = state_dict[conv1_name + '.weight']
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I != 3:
+            _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name)
+            del state_dict[conv1_name + '.weight']
+            strict = False
+        else:
+            # NOTE this strategy should be better than random init, but there could be other combinations of
+            # the original RGB input layer weights that'd work better for specific cases.
+            _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name)
+            repeat = int(math.ceil(in_chans / 3))
+            conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
+            conv1_weight *= (3 / float(in_chans))
+            conv1_weight = conv1_weight.to(conv1_type)
+            state_dict[conv1_name + '.weight'] = conv1_weight
+    classifier_name = cfg['classifier']
+    if num_classes == 1000 and cfg['num_classes'] == 1001:
+        # special case for imagenet trained models with extra background class in pretrained weights
+        classifier_weight = state_dict[classifier_name + '.weight']
+        state_dict[classifier_name + '.weight'] = classifier_weight[1:]
+        classifier_bias = state_dict[classifier_name + '.bias']
+        state_dict[classifier_name + '.bias'] = classifier_bias[1:]
+    elif num_classes != cfg['num_classes']:
+        # completely discard fully connected for all other differences between pretrained and created model
+        del state_dict[classifier_name + '.weight']
+        del state_dict[classifier_name + '.bias']
+        strict = False
+    model.load_state_dict(state_dict, strict=strict)
+def extract_layer(model, layer):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    if not hasattr(model, 'module') and layer[0] == 'module':
+        layer = layer[1:]
+    for l in layer:
+        if hasattr(module, l):
+            if not l.isdigit():
+                module = getattr(module, l)
+            else:
+                module = module[int(l)]
+        else:
+            return module
+    return module
+def set_layer(model, layer, val):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    lst_index = 0
+    module2 = module
+    for l in layer:
+        if hasattr(module2, l):
+            if not l.isdigit():
+                module2 = getattr(module2, l)
+            else:
+                module2 = module2[int(l)]
+            lst_index += 1
+    lst_index -= 1
+    for l in layer[:lst_index]:
+        if not l.isdigit():
+            module = getattr(module, l)
+        else:
+            module = module[int(l)]
+    l = layer[lst_index]
+    setattr(module, l, val)
+def adapt_model_from_string(parent_module, model_string):
+    separator = '***'
+    state_dict = {}
+    lst_shape = model_string.split(separator)
+    for k in lst_shape:
+        k = k.split(':')
+        key = k[0]
+        shape = k[1][1:-1].split(',')
+        if shape[0] != '':
+            state_dict[key] = [int(i) for i in shape]
+    new_module = deepcopy(parent_module)
+    for n, m in parent_module.named_modules():
+        old_module = extract_layer(parent_module, n)
+        if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame):
+            if isinstance(old_module, Conv2dSame):
+                conv = Conv2dSame
+            else:
+                conv = nn.Conv2d
+            s = state_dict[n + '.weight']
+            in_channels = s[1]
+            out_channels = s[0]
+            g = 1
+            if old_module.groups > 1:
+                in_channels = out_channels
+                g = in_channels
+            new_conv = conv(
+                in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size,
+                bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation,
+                groups=g, stride=old_module.stride)
+            set_layer(new_module, n, new_conv)
+        if isinstance(old_module, nn.BatchNorm2d):
+            new_bn = nn.BatchNorm2d(
+                num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
+                affine=old_module.affine, track_running_stats=True)
+            set_layer(new_module, n, new_bn)
+        if isinstance(old_module, nn.Linear):
+            # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
+            num_features = state_dict[n + '.weight'][1]
+            new_fc = nn.Linear(
+                in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
+            set_layer(new_module, n, new_fc)
+            if hasattr(new_module, 'num_features'):
+                new_module.num_features = num_features
+    new_module.eval()
+    parent_module.eval()
+    return new_module
+def adapt_model_from_file(parent_module, model_variant):
+    adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt')
+    with open(adapt_file, 'r') as f:
+        return adapt_model_from_string(parent_module, f.read().strip())
+def build_model_with_cfg(
+        model_cls: Callable,
+        variant: str,
+        pretrained: bool,
+        default_cfg: dict,
+        model_cfg: dict = None,
+        feature_cfg: dict = None,
+        pretrained_strict: bool = True,
+        pretrained_filter_fn: Callable = None,
+        **kwargs):
+    pruned = kwargs.pop('pruned', False)
+    features = False
+    feature_cfg = feature_cfg or {}
+    if kwargs.pop('features_only', False):
+        features = True
+        feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
+        if 'out_indices' in kwargs:
+            feature_cfg['out_indices'] = kwargs.pop('out_indices')
+    model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs)
+    model.default_cfg = deepcopy(default_cfg)
+    if pruned:
+        model = adapt_model_from_file(model, variant)
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=kwargs.get('num_classes', 0),
+            in_chans=kwargs.get('in_chans', 3),
+            filter_fn=pretrained_filter_fn, strict=pretrained_strict)
+    if features:
+        feature_cls = FeatureListNet
+        if 'feature_cls' in feature_cfg:
+            feature_cls = feature_cfg.pop('feature_cls')
+            if isinstance(feature_cls, str):
+                feature_cls = feature_cls.lower()
+                if 'hook' in feature_cls:
+                    feature_cls = FeatureHookNet
+                else:
+                    assert False, f'Unknown feature class {feature_cls}'
+        model = feature_cls(model, **feature_cfg)
+    return model

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/layer_helpers.py ADDED Viewed

	@@ -0,0 +1,21 @@

+""" Layer/Module Helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from itertools import repeat
+import collections.abc
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/misc_functions.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#
+# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/
+# Written by Suraj Srinivas <[email protected]>
+#
+""" Misc helper functions """
+import cv2
+import numpy as np
+import subprocess
+import torch
+import torchvision.transforms as transforms
+class NormalizeInverse(transforms.Normalize):
+    # Undo normalization on images
+    def __init__(self, mean, std):
+        mean = torch.as_tensor(mean)
+        std = torch.as_tensor(std)
+        std_inv = 1 / (std + 1e-7)
+        mean_inv = -mean * std_inv
+        super(NormalizeInverse, self).__init__(mean=mean_inv, std=std_inv)
+    def __call__(self, tensor):
+        return super(NormalizeInverse, self).__call__(tensor.clone())
+def create_folder(folder_name):
+    try:
+        subprocess.call(['mkdir', '-p', folder_name])
+    except OSError:
+        None
+def save_saliency_map(image, saliency_map, filename):
+    """
+    Save saliency map on image.
+    Args:
+        image: Tensor of size (3,H,W)
+        saliency_map: Tensor of size (1,H,W)
+        filename: string with complete path and file extension
+    """
+    image = image.data.cpu().numpy()
+    saliency_map = saliency_map.data.cpu().numpy()
+    saliency_map = saliency_map - saliency_map.min()
+    saliency_map = saliency_map / saliency_map.max()
+    saliency_map = saliency_map.clip(0, 1)
+    saliency_map = np.uint8(saliency_map * 255).transpose(1, 2, 0)
+    saliency_map = cv2.resize(saliency_map, (224, 224))
+    image = np.uint8(image * 255).transpose(1, 2, 0)
+    image = cv2.resize(image, (224, 224))
+    # Apply JET colormap
+    color_heatmap = cv2.applyColorMap(saliency_map, cv2.COLORMAP_JET)
+    # Combine image with heatmap
+    img_with_heatmap = np.float32(color_heatmap) + np.float32(image)
+    img_with_heatmap = img_with_heatmap / np.max(img_with_heatmap)
+    cv2.imwrite(filename, np.uint8(255 * img_with_heatmap))

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__init__.py ADDED Viewed

File without changes

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (223 Bytes). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/layers_lrp.cpython-310.pyc ADDED Viewed

Binary file (9.31 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/__pycache__/layers_ours.cpython-310.pyc ADDED Viewed

Binary file (9.75 kB). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/layers_lrp.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['forward_hook', 'Clone', 'Add', 'Cat', 'ReLU', 'GELU', 'Dropout', 'BatchNorm2d', 'Linear', 'MaxPool2d',
+           'AdaptiveAvgPool2d', 'AvgPool2d', 'Conv2d', 'Sequential', 'safe_divide', 'einsum', 'Softmax', 'IndexSelect',
+           'LayerNorm', 'AddEye']
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__('dim', dim)
+        self.__setattr__('indices', indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__('num', num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__('dim', dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5))
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1)
+            S2 = safe_divide(R, Z2)
+            C1 = x1 * torch.autograd.grad(Z1, x1, S1)[0]
+            C2 = x2 * torch.autograd.grad(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+                (Z.size()[2] - 1) * self.stride[0] - 2 * self.padding[0] + self.kernel_size[0])
+        return F.conv_transpose2d(DY, weight, stride=self.stride, padding=self.padding, output_padding=output_padding)
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = self.X * 0 + \
+                torch.min(torch.min(torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            H = self.X * 0 + \
+                torch.max(torch.max(torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            Za = torch.conv2d(X, self.weight, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(L, pw, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(H, nw, bias=None, stride=self.stride, padding=self.padding) + 1e-9
+            S = R / Za
+            C = X * self.gradprop2(S, self.weight) - L * self.gradprop2(S, pw) - H * self.gradprop2(S, nw)
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(x1, w1, bias=None, stride=self.stride, padding=self.padding)
+                Z2 = F.conv2d(x2, w2, bias=None, stride=self.stride, padding=self.padding)
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/modules/layers_ours.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['forward_hook', 'Clone', 'Add', 'Cat', 'ReLU', 'GELU', 'Dropout', 'BatchNorm2d', 'Linear', 'MaxPool2d',
+           'AdaptiveAvgPool2d', 'AvgPool2d', 'Conv2d', 'Sequential', 'safe_divide', 'einsum', 'Softmax', 'IndexSelect',
+           'LayerNorm', 'AddEye']
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        a = self.X[0] * C[0]
+        b = self.X[1] * C[1]
+        a_sum = a.sum()
+        b_sum = b.sum()
+        a_fact = safe_divide(a_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        b_fact = safe_divide(b_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        a = a * safe_divide(a_fact, a.sum())
+        b = b * safe_divide(b_fact, b.sum())
+        outputs = [a, b]
+        return outputs
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__('dim', dim)
+        self.__setattr__('indices', indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__('num', num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__('dim', dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5))
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1 + Z2)
+            S2 = safe_divide(R, Z1 + Z2)
+            C1 = x1 * torch.autograd.grad(Z1, x1, S1)[0]
+            C2 = x2 * torch.autograd.grad(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+                (Z.size()[2] - 1) * self.stride[0] - 2 * self.padding[0] + self.kernel_size[0])
+        return F.conv_transpose2d(DY, weight, stride=self.stride, padding=self.padding, output_padding=output_padding)
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = self.X * 0 + \
+                torch.min(torch.min(torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            H = self.X * 0 + \
+                torch.max(torch.max(torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            Za = torch.conv2d(X, self.weight, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(L, pw, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(H, nw, bias=None, stride=self.stride, padding=self.padding) + 1e-9
+            S = R / Za
+            C = X * self.gradprop2(S, self.weight) - L * self.gradprop2(S, pw) - H * self.gradprop2(S, nw)
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(x1, w1, bias=None, stride=self.stride, padding=self.padding)
+                Z2 = F.conv2d(x2, w2, bias=None, stride=self.stride, padding=self.padding)
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/pertubation_eval_from_hdf5.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import torch
+import os
+from tqdm import tqdm
+import numpy as np
+import argparse
+# Import saliency methods and models
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_explanation_generator import Baselines
+from concept_attention.binary_segmentation_baselines.chefer_vit_explainability.ViT_new import vit_base_patch16_224
+# from models.vgg import vgg19
+import glob
+from dataset.expl_hdf5 import ImagenetResults
+def normalize(tensor,
+              mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+    dtype = tensor.dtype
+    mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+    std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+    tensor.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
+    return tensor
+def eval(args):
+    num_samples = 0
+    num_correct_model = np.zeros((len(imagenet_ds,)))
+    dissimilarity_model = np.zeros((len(imagenet_ds,)))
+    model_index = 0
+    if args.scale == 'per':
+        base_size = 224 * 224
+        perturbation_steps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+    elif args.scale == '100':
+        base_size = 100
+        perturbation_steps = [5, 10, 15, 20, 25, 30, 35, 40, 45]
+    else:
+        raise Exception('scale not valid')
+    num_correct_pertub = np.zeros((9, len(imagenet_ds)))
+    dissimilarity_pertub = np.zeros((9, len(imagenet_ds)))
+    logit_diff_pertub = np.zeros((9, len(imagenet_ds)))
+    prob_diff_pertub = np.zeros((9, len(imagenet_ds)))
+    perturb_index = 0
+    for batch_idx, (data, vis, target) in enumerate(tqdm(sample_loader)):
+        # Update the number of samples
+        num_samples += len(data)
+        data = data.to(device)
+        vis = vis.to(device)
+        target = target.to(device)
+        norm_data = normalize(data.clone())
+        # Compute model accuracy
+        pred = model(norm_data)
+        pred_probabilities = torch.softmax(pred, dim=1)
+        pred_org_logit = pred.data.max(1, keepdim=True)[0].squeeze(1)
+        pred_org_prob = pred_probabilities.data.max(1, keepdim=True)[0].squeeze(1)
+        pred_class = pred.data.max(1, keepdim=True)[1].squeeze(1)
+        tgt_pred = (target == pred_class).type(target.type()).data.cpu().numpy()
+        num_correct_model[model_index:model_index+len(tgt_pred)] = tgt_pred
+        probs = torch.softmax(pred, dim=1)
+        target_probs = torch.gather(probs, 1, target[:, None])[:, 0]
+        second_probs = probs.data.topk(2, dim=1)[0][:, 1]
+        temp = torch.log(target_probs / second_probs).data.cpu().numpy()
+        dissimilarity_model[model_index:model_index+len(temp)] = temp
+        if args.wrong:
+            wid = np.argwhere(tgt_pred == 0).flatten()
+            if len(wid) == 0:
+                continue
+            wid = torch.from_numpy(wid).to(vis.device)
+            vis = vis.index_select(0, wid)
+            data = data.index_select(0, wid)
+            target = target.index_select(0, wid)
+        # Save original shape
+        org_shape = data.shape
+        if args.neg:
+            vis = -vis
+        vis = vis.reshape(org_shape[0], -1)
+        for i in range(len(perturbation_steps)):
+            _data = data.clone()
+            _, idx = torch.topk(vis, int(base_size * perturbation_steps[i]), dim=-1)
+            idx = idx.unsqueeze(1).repeat(1, org_shape[1], 1)
+            _data = _data.reshape(org_shape[0], org_shape[1], -1)
+            _data = _data.scatter_(-1, idx, 0)
+            _data = _data.reshape(*org_shape)
+            _norm_data = normalize(_data)
+            out = model(_norm_data)
+            pred_probabilities = torch.softmax(out, dim=1)
+            pred_prob = pred_probabilities.data.max(1, keepdim=True)[0].squeeze(1)
+            diff = (pred_prob - pred_org_prob).data.cpu().numpy()
+            prob_diff_pertub[i, perturb_index:perturb_index+len(diff)] = diff
+            pred_logit = out.data.max(1, keepdim=True)[0].squeeze(1)
+            diff = (pred_logit - pred_org_logit).data.cpu().numpy()
+            logit_diff_pertub[i, perturb_index:perturb_index+len(diff)] = diff
+            target_class = out.data.max(1, keepdim=True)[1].squeeze(1)
+            temp = (target == target_class).type(target.type()).data.cpu().numpy()
+            num_correct_pertub[i, perturb_index:perturb_index+len(temp)] = temp
+            probs_pertub = torch.softmax(out, dim=1)
+            target_probs = torch.gather(probs_pertub, 1, target[:, None])[:, 0]
+            second_probs = probs_pertub.data.topk(2, dim=1)[0][:, 1]
+            temp = torch.log(target_probs / second_probs).data.cpu().numpy()
+            dissimilarity_pertub[i, perturb_index:perturb_index+len(temp)] = temp
+        model_index += len(target)
+        perturb_index += len(target)
+    np.save(os.path.join(args.experiment_dir, 'model_hits.npy'), num_correct_model)
+    np.save(os.path.join(args.experiment_dir, 'model_dissimilarities.npy'), dissimilarity_model)
+    np.save(os.path.join(args.experiment_dir, 'perturbations_hits.npy'), num_correct_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_dissimilarities.npy'), dissimilarity_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_logit_diff.npy'), logit_diff_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_prob_diff.npy'), prob_diff_pertub[:, :perturb_index])
+    print(np.mean(num_correct_model), np.std(num_correct_model))
+    print(np.mean(dissimilarity_model), np.std(dissimilarity_model))
+    print(perturbation_steps)
+    print(np.mean(num_correct_pertub, axis=1), np.std(num_correct_pertub, axis=1))
+    print(np.mean(dissimilarity_pertub, axis=1), np.std(dissimilarity_pertub, axis=1))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train a segmentation')
+    parser.add_argument('--batch-size', type=int,
+                        default=16,
+                        help='')
+    parser.add_argument('--neg', type=bool,
+                        default=True,
+                        help='')
+    parser.add_argument('--value', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--scale', type=str,
+                        default='per',
+                        choices=['per', '100'],
+                        help='')
+    parser.add_argument('--method', type=str,
+                        default='grad_rollout',
+                        choices=['rollout', 'lrp', 'transformer_attribution', 'full_lrp', 'v_gradcam', 'lrp_last_layer',
+                                 'lrp_second_layer', 'gradcam',
+                                 'attn_last_layer', 'attn_gradcam', 'input_grads'],
+                        help='')
+    parser.add_argument('--vis-class', type=str,
+                        default='top',
+                        choices=['top', 'target', 'index'],
+                        help='')
+    parser.add_argument('--wrong', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--class-id', type=int,
+                        default=0,
+                        help='')
+    parser.add_argument('--is-ablation', type=bool,
+                        default=False,
+                        help='')
+    args = parser.parse_args()
+    torch.multiprocessing.set_start_method('spawn')
+    # PATH variables
+    PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
+    dataset = PATH + 'dataset/'
+    os.makedirs(os.path.join(PATH, 'experiments'), exist_ok=True)
+    os.makedirs(os.path.join(PATH, 'experiments/perturbations'), exist_ok=True)
+    exp_name = args.method
+    exp_name += '_neg' if args.neg else '_pos'
+    print(exp_name)
+    if args.vis_class == 'index':
+        args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}_{}'.format(exp_name,
+                                                                                       args.vis_class,
+                                                                                       args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}/{}'.format(exp_name,
+                                                                                    args.vis_class, ablation_fold))
+        # args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}'.format(exp_name,
+        #                                                                             args.vis_class))
+    if args.wrong:
+        args.runs_dir += '_wrong'
+    experiments = sorted(glob.glob(os.path.join(args.runs_dir, 'experiment_*')))
+    experiment_id = int(experiments[-1].split('_')[-1]) + 1 if experiments else 0
+    args.experiment_dir = os.path.join(args.runs_dir, 'experiment_{}'.format(str(experiment_id)))
+    os.makedirs(args.experiment_dir, exist_ok=True)
+    cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if cuda else "cpu")
+    if args.vis_class == 'index':
+        vis_method_dir = os.path.join(PATH,'visualizations/{}/{}_{}'.format(args.method,
+                                                          args.vis_class,
+                                                          args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        vis_method_dir = os.path.join(PATH,'visualizations/{}/{}/{}'.format(args.method,
+                                                       args.vis_class, ablation_fold))
+        # vis_method_dir = os.path.join(PATH, 'visualizations/{}/{}'.format(args.method,
+        #                                                                      args.vis_class))
+    # imagenet_ds = ImagenetResults('visualizations/{}'.format(args.method))
+    imagenet_ds = ImagenetResults(vis_method_dir)
+    # Model
+    model = vit_base_patch16_224(pretrained=True).cuda()
+    model.eval()
+    save_path = PATH + 'results/'
+    sample_loader = torch.utils.data.DataLoader(
+        imagenet_ds,
+        batch_size=args.batch_size,
+        num_workers=2,
+        shuffle=False)
+    eval(args)

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__init__.py ADDED Viewed

File without changes

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (221 Bytes). View file

concept_attention/binary_segmentation_baselines/chefer_vit_explainability/utils/__pycache__/confusionmatrix.cpython-310.pyc ADDED Viewed

Binary file (3.55 kB). View file