rkv1990/FLUX.1-Fill-dev-outpainting

FLUX.1 Fill [dev] is a 12 billion parameter rectified flow transformer capable of filling areas in existing images based on a text description.

The idea is to unlock the full outpainting potential of Flux.1.Fill-dev model. The original model parameters have not been finetuned or modified. Rather, this simple hack unlocks the full potential of the Flux.1-Fill-dev model.

This is based on Flux.1-Fill-dev model and follows the FLUX.1-dev Non-Commercial License https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev is applicable.

Diffusers

To use FLUX.1 Fill [dev] with the 🧨 diffusers python library, first install or upgrade diffusers

pip install -U diffusers

Then you can use FluxFillPipeline to run the model Here is a code snippet to use the code.

import numpy as np
import cv2
from PIL import Image
import torch
from diffusers import FluxFillPipeline
from diffusers.utils import load_image
from typing import Union

def prepare_masked_image(
    foreground: Union[Image.Image, np.ndarray],
    mask: Union[Image.Image, np.ndarray],
    alpha: float = 0.001,
    blur: bool = True
) -> Image.Image:
    """
    Combines the foreground and mask to produce a masked image with noise in the masked region.

    Args:
        foreground (PIL.Image.Image or np.ndarray): The input image to be inpainted.
        mask (PIL.Image.Image or np.ndarray): A binary mask (0 or 255) indicating the foreground region.
        alpha (float): Blending factor for noise. Lower alpha → more noise in the masked area.
        blur (bool): Whether to blur the randomly generated noise.

    Returns:
        PIL.Image.Image: The resulting masked image with noise in the masked area.
    """

    # Ensure foreground is an ndarray
    if isinstance(foreground, Image.Image):
        foreground_np = np.array(foreground)
    else:
        foreground_np = foreground  # assume already a NumPy array

    # Ensure mask is a NumPy array and single-channel
    if isinstance(mask, Image.Image):
        mask_np = np.array(mask.convert("L"))  # convert to grayscale
    else:
        mask_np = mask
        if mask_np.ndim == 3:
            mask_np = cv2.cvtColor(mask_np, cv2.COLOR_BGR2GRAY)

    h, w, c = foreground_np.shape  # height, width, channels

    # Create 3×3 kernel for dilation (used later)
    kernel = np.ones((3, 3), np.uint8)

    # Generate random Gaussian noise
    noise = np.random.rand(h, w) * 255
    noise = noise.astype(np.uint8)
    if blur:
        noise = cv2.GaussianBlur(noise, (5, 5), 0)
    # Stack to 3 channels
    noise_rgb = np.stack([noise, noise, noise], axis=-1)

    # Prepare a black background
    black_bg = np.zeros_like(foreground_np, dtype=np.uint8)

    # Dilate the mask to get smoother boundaries for seamlessClone
    dilated_mask = cv2.dilate(mask_np, kernel, iterations=10)

    # Compute center for seamlessClone (center of the image)
    center = (w // 2, h // 2)

    # Use mixed clone to merge the foreground onto a black background, using the dilated mask
    cloned = cv2.seamlessClone(
        src=foreground_np,
        dst=black_bg,
        mask=dilated_mask,
        p=center,
        flags=cv2.MIXED_CLONE
    )

    # Blend cloned result (mostly black except where mask is) with noise
    noisy_bg = (alpha * cloned + (1 - alpha) * noise_rgb).astype(np.uint8)

    # Normalize mask to [0,1] float if it’s in [0,255]

    if mask_np.max() <= 1:
        mask_norm = mask_np.astype(np.float32)
    else:
        mask_norm = (mask_np / 255.0).astype(np.float32)

    # Expand mask to 3 channels if needed
    if mask_norm.ndim == 2:
        mask_norm = np.stack([mask_norm] * 3, axis=-1)

    # Combine: keep original pixels where mask=0, use noisy_bg where mask=1
    combined = ((1 - mask_norm) * noisy_bg + mask_norm * foreground_np).astype(np.uint8)

    return Image.fromarray(combined)


def main():
    """Entry point for running the FluxFill pipeline."""
    # Load input image and its corresponding mask
    fg_mask = load_image("https://huggingface.co/rkv1990/FLUX.1-Fill-dev-outpainting/resolve/main/beauty-products-mask.png").convert("L")
    input_image= load_image("https://huggingface.co/rkv1990/FLUX.1-Fill-dev-outpainting/resolve/main/beauty-products.png").convert("RGB")
    inpaint_mask = np.array(255-np.array(fg_mask))
    w,h = input_image.size
    masked_image = prepare_masked_image(foreground=input_image, mask=fg_mask)

    # Initialize the FluxFill pipeline
    pipe = FluxFillPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-Fill-dev",
        torch_dtype=torch.bfloat16
    ).to("cuda")

    # Run the pipeline
    output = pipe(
        prompt="A mist-covered forest at dawn, with pale golden light filtering through ancient, twisted trees. Soft fog swirls around delicate wildflowers glowing faintly with bioluminescence.",
        image=masked_image,
        mask_image=inpaint_mask,
        height=h,
        width=w,
        guidance_scale=30,
        num_inference_steps=50,
        max_sequence_length=512,
        generator=torch.Generator(device="cpu").manual_seed(0)
    ).images[0]

    # Save the resulting image
    output.save("flux-fill-dev.png")
    print("Saved output to flux-fill-dev.png")


if __name__ == "__main__":
    main()

To learn more check out the diffusers documentation

rkv1990
/

FLUX.1-Fill-dev-outpainting

Diffusers

Model tree for rkv1990/FLUX.1-Fill-dev-outpainting