Spaces:
Build error
Build error
import torch | |
import numpy as np | |
import cv2 | |
import math | |
from ..models import FluxFillPipeline | |
def get_smooth_mask(general_mask, ksize=(120, 120)): | |
r"""Generate a smooth mask from the general mask using morphological dilation. | |
Args: | |
general_mask (np.ndarray): The input mask to be smoothed, expected to be a binary mask | |
with shape [H, W] and dtype uint8 (0 or 1). | |
ksize (tuple): The size of the structuring element used for dilation, specified as | |
(height, width). Default is (120, 120). | |
Returns: | |
np.ndarray: The smoothed mask, with the same shape as the input mask, where | |
the values are either 0 or 1 (uint8). | |
""" | |
# Ensure kernel size is a tuple of integers | |
ksize = (int(ksize[0]), int(ksize[1])) | |
# Create rectangular structuring element for dilation | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, ksize) | |
# Apply dilation to expand mask regions | |
mask_array = cv2.dilate(general_mask.astype( | |
np.uint8), kernel) # [1024, 2048] uint8 1 | |
# Convert back to binary mask | |
mask_array = (mask_array > 0).astype(np.uint8) | |
return mask_array | |
def build_inpaint_model(model_path, lora_path, subfolder, device=0): | |
r"""Build the inpainting model pipeline. | |
Args: | |
model_path (str): The path to the pre-trained model. | |
lora_path (str): The path to the LoRA weights. | |
device (int): The device ID to load the model onto (default: 0). | |
Returns: | |
pipe: The inpainting pipeline object. | |
""" | |
# Initialize pipeline with bfloat16 precision for memory efficiency | |
pipe = FluxFillPipeline.from_pretrained( | |
model_path, torch_dtype=torch.bfloat16).to(f"cuda:{device}") | |
pipe.load_lora_weights( | |
lora_path, | |
subfolder=subfolder, | |
weight_name="lora.safetensors", # default weight name | |
torch_dtype=torch.bfloat16 | |
) | |
pipe.enable_model_cpu_offload() # save some VRAM by offloading the model to CPU | |
pipe.device_id = device | |
return pipe | |
def get_adaptive_smooth_mask_ksize_ctrl(general_masks, mask_infos, basek=100, threshold=10000, r=1): | |
r"""Generate a smooth mask with adaptive kernel size control based on mask area. | |
Args: | |
general_masks (np.ndarray): The input mask array, expected to be a 2D array of shape [H, W] | |
where each pixel value corresponds to a mask ID. | |
mask_infos (list): A list of dictionaries containing information about each mask, including | |
the area and label of the mask. | |
basek (int): The base kernel size for smoothing, default is 100. | |
threshold (int): The area threshold to determine the scaling factor for the kernel size, | |
default is 10000. | |
r (int): A scaling factor for the kernel size, default is 1. | |
Returns: | |
np.ndarray: The smoothed mask array, with the same shape as the input mask, | |
where the values are either 0 or 1 (uint8). | |
""" | |
# Initialize output mask | |
mask_array = np.zeros_like(general_masks).astype(np.bool_) | |
# Process each mask region individually | |
for i in range(len(mask_infos)): | |
mask_info = mask_infos[i] | |
area = mask_info["area"] | |
# Calculate size ratio with threshold clamping | |
ratio = area / threshold | |
ratio = math.sqrt(min(ratio, 1.0)) | |
# Extract current object mask | |
mask = (general_masks == i + 1).astype(np.uint8) | |
# Default kernel for other objects | |
mask = get_smooth_mask(mask, ksize=( | |
int(basek*ratio)*r, int((basek+10)*ratio)*r)).astype(np.bool_) | |
# Combine with existing masks | |
mask_array = np.logical_or(mask_array, mask) | |
return mask_array.astype(np.uint8) | |