Spaces:

helblazer811
/

ConceptAttention

Running on Zero

App Files Files Community

helblazer811 commited on 8 days ago

Commit

5f0abcc

1 Parent(s): 4b30dce

Working app locally.

Browse files

Files changed (6) hide show

app.py +31 -9
concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc +0 -0
concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc +0 -0
concept_attention/concept_attention_pipeline.py +2 -2
concept_attention/flux/src/flux/util.py +2 -1
concept_attention/image_generator.py +3 -2

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import io
 import spaces
 import gradio as gr
 from PIL import Image
 from concept_attention import ConceptAttentionFluxPipeline
@@ -17,15 +20,28 @@ concept_attention_default_args = {
 }
 IMG_SIZE = 250
 EXAMPLES = [
     [
-        "A fluffy cat sitting on a windowsill",  # prompt
-        "cat.jpg",  # image
-        "fur, whiskers, eyes",  # words
         42,  # seed
     ],
-    # ["Mountain landscape with lake", "cat.jpg", "sky, trees, water", 123],
-    # ["Portrait of a young woman", "monkey.png", "face, hair, eyes", 456],
 ]
 pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
@@ -40,9 +56,15 @@ def process_inputs(prompt, input_image, word_list, seed):
     concepts = [w.strip() for w in word_list.split(",")]
     if input_image is not None:
-        input_image = Image.fromarray(input_image)
-        input_image = input_image.convert("RGB")
-        input_image = input_image.resize((1024, 1024))
         pipeline_output = pipeline.encode_image(
             image=input_image,
@@ -128,7 +150,7 @@ with gr.Blocks(
         gr.Examples(examples=EXAMPLES, inputs=[prompt, image_input, words, seed], outputs=[output_image, saliency_display], fn=process_inputs, cache_examples=False)
 if __name__ == "__main__":
-    demo.launch()
     #     share=True,
     #     server_name="0.0.0.0",
     #     inbrowser=True,

 import spaces
 import gradio as gr
 from PIL import Image
+import requests
+import numpy as np
+import PIL
 from concept_attention import ConceptAttentionFluxPipeline
 }
 IMG_SIZE = 250
+def download_image(url):
+    return Image.open(io.BytesIO(requests.get(url).content))
 EXAMPLES = [
     [
+        "A dog by a tree",  # prompt
+        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/dog_by_tree.png?raw=true"),
+        "tree, dog, grass, background",  # words
+        42,  # seed
+    ],
+    [
+        "A dragon",  # prompt
+        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/dragon_image.png?raw=true"),
+        "dragon, sky, rock, cloud",  # words
         42,  # seed
     ],
+       [
+        "A hot air balloon",  # prompt
+        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/hot_air_balloon.png?raw=true"),
+        "balloon, sky, water, tree",  # words
+        42,  # seed
+    ]
 ]
 pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
     concepts = [w.strip() for w in word_list.split(",")]
     if input_image is not None:
+        if isinstance(input_image, np.ndarray):
+            input_image = Image.fromarray(input_image)
+            input_image = input_image.convert("RGB")
+            input_image = input_image.resize((1024, 1024))
+        elif isinstance(input_image, PIL.Image.Image):
+            input_image = input_image.convert("RGB")
+            input_image = input_image.resize((1024, 1024))
+        print(input_image.size)
         pipeline_output = pipeline.encode_image(
             image=input_image,
         gr.Examples(examples=EXAMPLES, inputs=[prompt, image_input, words, seed], outputs=[output_image, saliency_display], fn=process_inputs, cache_examples=False)
 if __name__ == "__main__":
+    demo.launch(max_threads=1)
     #     share=True,
     #     server_name="0.0.0.0",
     #     inbrowser=True,

concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc CHANGED Viewed

Binary files a/concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc and b/concept_attention/binary_segmentation_baselines/__pycache__/raw_cross_attention.cpython-310.pyc differ

concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc CHANGED Viewed

Binary files a/concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc and b/concept_attention/binary_segmentation_baselines/__pycache__/raw_output_space.cpython-310.pyc differ

concept_attention/concept_attention_pipeline.py CHANGED Viewed

@@ -28,7 +28,7 @@ class ConceptAttentionFluxPipeline():
         device="cuda:0"
     ):
         self.model_name = model_name
-        self.offload_model = False
         # Load the generator
         self.flux_generator = FluxGenerator(
             model_name=model_name,
@@ -139,7 +139,7 @@ class ConceptAttentionFluxPipeline():
             height=height,
             width=width
         )
-        concept_heatmaps = concept_heatmaps.detach().cpu().numpy()
         # Convert the torch heatmaps to PIL images.
         if return_pil_heatmaps:

         device="cuda:0"
     ):
         self.model_name = model_name
+        self.offload_model = offload_model
         # Load the generator
         self.flux_generator = FluxGenerator(
             model_name=model_name,
             height=height,
             width=width
         )
+        concept_heatmaps = concept_heatmaps.detach().cpu().numpy().squeeze()
         # Convert the torch heatmaps to PIL images.
         if return_pil_heatmaps:

concept_attention/flux/src/flux/util.py CHANGED Viewed

@@ -136,6 +136,7 @@ class T5Embedder(nn.Module):
         self.hf_module = hf_module
         self.tokenizer = tokenizer
     def forward(self, text: list[str]) -> torch.Tensor:
         batch_encoding = self.tokenizer(
             text,
@@ -181,7 +182,7 @@ def load_t5(device: str | torch.device = "cuda", max_length: int = 512) -> HFEmb
         tokenizer,
         max_length=max_length,
         output_key="last_hidden_state"
-    ).to(device)
     # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
     # Load the safe tensors model
     # ckpt_path = hf_hub_download(configs["name"].repo_id, configs["name"].repo_flow)

         self.hf_module = hf_module
         self.tokenizer = tokenizer
+    @torch.no_grad()
     def forward(self, text: list[str]) -> torch.Tensor:
         batch_encoding = self.tokenizer(
             text,
         tokenizer,
         max_length=max_length,
         output_key="last_hidden_state"
+    ).to(device).to(torch.bfloat16)
     # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
     # Load the safe tensors model
     # ckpt_path = hf_hub_download(configs["name"].repo_id, configs["name"].repo_flow)

concept_attention/image_generator.py CHANGED Viewed

@@ -58,8 +58,9 @@ def get_models(
     clip = load_clip(device)
     model = load_flow_model(name, device="cpu" if offload else device, attention_block_class=attention_block_class, dit_class=dit_class)
     ae = load_ae(name, device="cpu" if offload else device)
-    # nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
-    return model, ae, t5, clip, None
 class FluxGenerator():

     clip = load_clip(device)
     model = load_flow_model(name, device="cpu" if offload else device, attention_block_class=attention_block_class, dit_class=dit_class)
     ae = load_ae(name, device="cpu" if offload else device)
+    nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+    return model, ae, t5, clip, nsfw_classifier
 class FluxGenerator():