Spaces:

mooki0
/

HunyuanWorld-Demo

Build error

App Files Files Community

HunyuanWorld-Demo / hy3dworld /utils /inpaint_utils.py

mooki0

Initial commit of Gradio app

57276d4 verified 15 days ago

raw

history blame contribute delete

3.77 kB

	import torch
	import numpy as np
	import cv2
	import math
	from ..models import FluxFillPipeline


	def get_smooth_mask(general_mask, ksize=(120, 120)):
	r"""Generate a smooth mask from the general mask using morphological dilation.
	Args:
	general_mask (np.ndarray): The input mask to be smoothed, expected to be a binary mask
	with shape [H, W] and dtype uint8 (0 or 1).
	ksize (tuple): The size of the structuring element used for dilation, specified as
	(height, width). Default is (120, 120).
	Returns:
	np.ndarray: The smoothed mask, with the same shape as the input mask, where
	the values are either 0 or 1 (uint8).
	"""
	# Ensure kernel size is a tuple of integers
	ksize = (int(ksize[0]), int(ksize[1]))

	# Create rectangular structuring element for dilation
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, ksize)

	# Apply dilation to expand mask regions
	mask_array = cv2.dilate(general_mask.astype(
	np.uint8), kernel) # [1024, 2048] uint8 1

	# Convert back to binary mask
	mask_array = (mask_array > 0).astype(np.uint8)

	return mask_array


	def build_inpaint_model(model_path, lora_path, subfolder, device=0):
	r"""Build the inpainting model pipeline.
	Args:
	model_path (str): The path to the pre-trained model.
	lora_path (str): The path to the LoRA weights.
	device (int): The device ID to load the model onto (default: 0).
	Returns:
	pipe: The inpainting pipeline object.
	"""
	# Initialize pipeline with bfloat16 precision for memory efficiency
	pipe = FluxFillPipeline.from_pretrained(
	model_path, torch_dtype=torch.bfloat16).to(f"cuda:{device}")
	pipe.load_lora_weights(
	lora_path,
	subfolder=subfolder,
	weight_name="lora.safetensors", # default weight name
	torch_dtype=torch.bfloat16
	)
	pipe.enable_model_cpu_offload() # save some VRAM by offloading the model to CPU
	pipe.device_id = device
	return pipe


	def get_adaptive_smooth_mask_ksize_ctrl(general_masks, mask_infos, basek=100, threshold=10000, r=1):
	r"""Generate a smooth mask with adaptive kernel size control based on mask area.
	Args:
	general_masks (np.ndarray): The input mask array, expected to be a 2D array of shape [H, W]
	where each pixel value corresponds to a mask ID.
	mask_infos (list): A list of dictionaries containing information about each mask, including
	the area and label of the mask.
	basek (int): The base kernel size for smoothing, default is 100.
	threshold (int): The area threshold to determine the scaling factor for the kernel size,
	default is 10000.
	r (int): A scaling factor for the kernel size, default is 1.
	Returns:
	np.ndarray: The smoothed mask array, with the same shape as the input mask,
	where the values are either 0 or 1 (uint8).
	"""
	# Initialize output mask
	mask_array = np.zeros_like(general_masks).astype(np.bool_)

	# Process each mask region individually
	for i in range(len(mask_infos)):
	mask_info = mask_infos[i]
	area = mask_info["area"]

	# Calculate size ratio with threshold clamping
	ratio = area / threshold
	ratio = math.sqrt(min(ratio, 1.0))

	# Extract current object mask
	mask = (general_masks == i + 1).astype(np.uint8)

	# Default kernel for other objects
	mask = get_smooth_mask(mask, ksize=(
	int(basekratio)r, int((basek+10)ratio)r)).astype(np.bool_)

	# Combine with existing masks
	mask_array = np.logical_or(mask_array, mask)

	return mask_array.astype(np.uint8)