Spaces:

LPX55
/

Diptych-FLUX.1-merged_8step

Runtime error

App Files Files Community

LPX55 commited on Mar 23

Commit

d81b69d

1 Parent(s): a739682

attempt 53

Browse files

Files changed (1) hide show

app.py +57 -27

app.py CHANGED Viewed

@@ -8,6 +8,10 @@ from transformer_flux import FluxTransformer2DModel
 from pipeline_flux_cnet import FluxControlNetInpaintingPipeline
 from PIL import Image, ImageDraw
 import numpy as np
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Ensure that the minimal version of diffusers is installed
@@ -52,57 +56,83 @@ def create_mask_from_editor(editor_value):
     mask_image = Image.fromarray(mask_array)
     return mask_image
-def create_diptych_image(image, mask):
-    # Create a diptych image with original on left and masked on right
     width, height = image.size
     diptych = Image.new('RGB', (width * 2, height), 'black')
     diptych.paste(image, (0, 0))
-    diptych.paste(mask, (width, 0))
     return diptych
 @spaces.GPU()
 def inpaint_image(image, prompt, editor_value):
-    # Create mask from editor value
-    mask = create_mask_from_editor(editor_value)
-    # Load and preprocess image
-    image = image.convert("RGB").resize((768, 768))
-    mask = mask.convert("L").resize((768, 768))  # Convert mask to single channel (grayscale)
-    # Create diptych image
-    diptych_image = create_diptych_image(image, mask)
-    # Preprocess prompt and image for the pipeline
-    prompt = pipe.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.to("cuda")
-    image_tensor = pipe.feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
-    mask_tensor = pipe.feature_extractor(images=mask, return_tensors="pt").pixel_values.to("cuda")
-    control_image_tensor = pipe.feature_extractor(images=diptych_image, return_tensors="pt").pixel_values.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(24)
     # Calculate attention scale mask
     attn_scale_factor = 1.5
-    size = (1536, 768)
-    H, W = size[1] // 16, size[0] // 16
     attn_scale_mask = torch.zeros(size[1], size[0])
-    attn_scale_mask[:, 768:] = 1.0  # height, width
     attn_scale_mask = torch.nn.functional.interpolate(attn_scale_mask[None, None, :, :], (H, W), mode='nearest-exact').flatten()
     attn_scale_mask = attn_scale_mask[None, None, :, None].repeat(1, 24, 1, H*W)
     transposed_inverted_attn_scale_mask = (1.0 - attn_scale_mask).transpose(-1, -2)
     cross_attn_region = torch.logical_and(attn_scale_mask, transposed_inverted_attn_scale_mask)
     cross_attn_region = cross_attn_region * attn_scale_factor
     cross_attn_region[cross_attn_region < 1.0] = 1.0
     full_attn_scale_mask = torch.ones(1, 24, 512+H*W, 512+H*W)
     full_attn_scale_mask[:, :, 512:, 512:] = cross_attn_region
     full_attn_scale_mask = full_attn_scale_mask.to(device=pipe.transformer.device, dtype=torch.bfloat16)
     # Inpaint
     result = pipe(
         prompt=prompt,
         height=size[1],
         width=size[0],
-        control_image=control_image_tensor,
-        control_mask=mask_tensor,
         num_inference_steps=20,
         generator=generator,
         controlnet_conditioning_scale=0.95,
@@ -117,8 +147,8 @@ def inpaint_image(image, prompt, editor_value):
 iface = gr.Interface(
     fn=inpaint_image,
     inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(lines=1, placeholder="Enter your prompt here (e.g., 'wearing a christmas hat, in a busy street')", label="Prompt"),
         gr.ImageEditor(type="pil", label="Image with Mask", sources="upload", interactive=True)
     ],
     outputs=[
@@ -130,4 +160,4 @@ iface = gr.Interface(
 )
 # Launch the app
-iface.launch()

 from pipeline_flux_cnet import FluxControlNetInpaintingPipeline
 from PIL import Image, ImageDraw
 import numpy as np
+import subprocess
+subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Ensure that the minimal version of diffusers is installed
     mask_image = Image.fromarray(mask_array)
     return mask_image
+def create_mask_on_image(image, xyxy):
+    """
+    Create a white mask on the image given xyxy coordinates.
+    Args:
+        image: PIL Image
+        xyxy: List of [x1, y1, x2, y2] coordinates
+    Returns:
+        PIL Image with white mask
+    """
+    # Convert to numpy array
+    img_array = np.array(image)
+    # Create mask
+    mask = Image.new('RGB', image.size, (0, 0, 0))
+    draw = ImageDraw.Draw(mask)
+    # Draw white rectangle
+    draw.rectangle(xyxy, fill=(255, 255, 255))
+    # Convert mask to array
+    mask_array = np.array(mask)
+    # Apply mask to image
+    masked_array = np.where(mask_array == 255, 255, img_array)
+    return Image.fromarray(mask_array), Image.fromarray(masked_array)
+def create_diptych_image(image):
+    # Create a diptych image with original on left and black on right
     width, height = image.size
     diptych = Image.new('RGB', (width * 2, height), 'black')
     diptych.paste(image, (0, 0))
     return diptych
 @spaces.GPU()
 def inpaint_image(image, prompt, editor_value):
+    # Load image and mask
+    size = (1536, 768)
+    image = load_image(image).convert("RGB").resize((768, 768))
+    diptych_image = create_diptych_image(image)
+    # mask = load_image(mask_path).convert("RGB").resize(size)
+    # mask, mask_image = create_mask_on_image(image, [250, 275, 500, 400])
+    mask, mask_image = create_mask_on_image(diptych_image, [768, 0, 1536, 768])
     generator = torch.Generator(device="cuda").manual_seed(24)
+    # Load and preprocess image
     # Calculate attention scale mask
     attn_scale_factor = 1.5
+    # Create a tensor of ones with same size as diptych image
+    H, W = size[1]//16, size[0]//16
     attn_scale_mask = torch.zeros(size[1], size[0])
+    attn_scale_mask[:, 768:] = 1.0 # height, width
     attn_scale_mask = torch.nn.functional.interpolate(attn_scale_mask[None, None, :, :], (H, W), mode='nearest-exact').flatten()
     attn_scale_mask = attn_scale_mask[None, None, :, None].repeat(1, 24, 1, H*W)
+    # Get inverted attention mask by subtracting from 1.0
     transposed_inverted_attn_scale_mask = (1.0 - attn_scale_mask).transpose(-1, -2)
     cross_attn_region = torch.logical_and(attn_scale_mask, transposed_inverted_attn_scale_mask)
     cross_attn_region = cross_attn_region * attn_scale_factor
     cross_attn_region[cross_attn_region < 1.0] = 1.0
     full_attn_scale_mask = torch.ones(1, 24, 512+H*W, 512+H*W)
     full_attn_scale_mask[:, :, 512:, 512:] = cross_attn_region
+    # Convert to bfloat16 to match model dtype
     full_attn_scale_mask = full_attn_scale_mask.to(device=pipe.transformer.device, dtype=torch.bfloat16)
     # Inpaint
     result = pipe(
         prompt=prompt,
         height=size[1],
         width=size[0],
+        control_image=diptych_image,
+        control_mask=mask,
         num_inference_steps=20,
         generator=generator,
         controlnet_conditioning_scale=0.95,
 iface = gr.Interface(
     fn=inpaint_image,
     inputs=[
+        gr.Image(type="filepath", label="Upload Image"),
+        gr.Textbox(lines=2, placeholder="Enter your prompt here (e.g., 'wearing a christmas hat, in a busy street')", label="Prompt"),
         gr.ImageEditor(type="pil", label="Image with Mask", sources="upload", interactive=True)
     ],
     outputs=[
 )
 # Launch the app
+iface.launch(share=True)