LIA-X-fast

Paused

Julian Bilcke

bcafc05 3 months ago

9.04 kB

	import tempfile
	import time
	import gradio as gr
	import torch
	import torchvision
	from PIL import Image
	import numpy as np
	import imageio
	import spaces
	from einops import rearrange

	# lables
	labels_k = [
	'yaw1',
	'yaw2',
	'pitch',
	'roll1',
	'roll2',
	'neck',

	'pout',
	'open->close',
	'"O" Mouth',
	'smile',

	'close->open',
	'eyebrows',
	'eyeballs1',
	'eyeballs2',

	]

	labels_v = [
	37, 39, 28, 15, 33, 31,
	6, 25, 16, 19,
	13, 24, 17, 26
	]


	@torch.compiler.allow_in_graph
	def load_image(img, size):
	img = Image.open(img).convert('RGB')
	w, h = img.size
	img = img.resize((size, size))
	img = np.asarray(img)
	# Make a writable copy to avoid torch.compile issues
	img = np.copy(img)
	img = np.transpose(img, (2, 0, 1)) # 3 x 256 x 256

	return img / 255.0, w, h


	@torch.compiler.allow_in_graph
	def img_preprocessing(img_path, size):
	img, w, h = load_image(img_path, size) # [0, 1]
	img = torch.from_numpy(img).unsqueeze(0).float() # [0, 1]
	imgs_norm = (img - 0.5) * 2.0 # [-1, 1]

	return imgs_norm, w, h


	# Pre-compile resize transforms for better performance
	resize_transform_cache = {}

	def get_resize_transform(size):
	"""Get cached resize transform - creates once, reuses many times"""
	if size not in resize_transform_cache:
	# Only create the transform if it doesn't exist in cache
	resize_transform_cache[size] = torchvision.transforms.Resize(
	size,
	interpolation=torchvision.transforms.InterpolationMode.BILINEAR,
	antialias=True
	)
	return resize_transform_cache[size]


	def resize(img, size):
	"""Use cached resize transform"""
	transform = get_resize_transform((size, size))
	return transform(img)


	def resize_back(img, w, h):
	"""Use cached resize transform for back operation"""
	transform = get_resize_transform((h, w))
	return transform(img)

	def img_denorm(img):
	img = img.clamp(-1, 1).cpu()
	img = (img - img.min()) / (img.max() - img.min())

	return img


	def img_postprocessing(img, w, h):
	# Resize on GPU (using cached transform)
	img = resize_back(img, w, h)

	# Denormalize ON GPU (avoid early CPU transfer)
	img = img.clamp(-1, 1) # Still on GPU
	img = (img - img.min()) / (img.max() - img.min()) # Still on GPU

	# Single optimized CPU transfer
	img = img.squeeze(0).permute(1, 2, 0).contiguous() # contiguous() for fast transfer
	img_output = (img.cpu().numpy() * 255).astype(np.uint8) # Single CPU transfer

	# return the Numpy array directly, since Gradio supports it
	return img_output

	def img_edit(gen, device):

	@torch.compile
	def compiled_inference(image_tensor, selected_s):
	"""Compiled version of just the model inference"""
	return gen.edit_img(image_tensor, labels_v, selected_s)

	# Pre-warm the compiled model with dummy data to reduce first-run compilation time
	def _warmup_model():
	"""Pre-warm the model compilation with representative shapes"""
	print("[img_edit] Pre-warming model compilation...")
	dummy_image = torch.randn(1, 3, 512, 512, device=device)
	dummy_selected_s = [0.0] * len(labels_v)

	try:
	with torch.inference_mode():
	_ = compiled_inference(dummy_image, dummy_selected_s)
	print("[img_edit] Model pre-warming completed successfully")
	except Exception as e:
	print(f"[img_edit] Model pre-warming failed (will compile on first use): {e}")

	# Pre-warm the model
	_warmup_model()

	@spaces.GPU
	@torch.inference_mode()
	def edit_img(image, *selected_s):
	# Start timing (outside compiled function)
	start_time = time.time()
	print(f"[edit_img] Starting image editing...")

	# Image preprocessing timing
	preprocess_start = time.time()
	image_tensor, w, h = img_preprocessing(image, 512)
	image_tensor = image_tensor.to(device)
	preprocess_end = time.time()
	print(f"[edit_img] Preprocessing took: {(preprocess_end - preprocess_start) * 1000:.2f} ms")

	# Model inference timing (compile only the core computation)
	inference_start = time.time()
	edited_image_tensor = compiled_inference(image_tensor, selected_s)
	inference_end = time.time()
	print(f"[edit_img] Model inference took: {(inference_end - inference_start) * 1000:.2f} ms")

	# Post-processing timing
	postprocess_start = time.time()
	edited_image = img_postprocessing(edited_image_tensor, w, h)
	postprocess_end = time.time()
	print(f"[edit_img] Post-processing took: {(postprocess_end - postprocess_start) * 1000:.2f} ms")

	# Total time
	end_time = time.time()
	total_time_ms = (end_time - start_time) * 1000
	print(f"[edit_img] Total execution time: {total_time_ms:.2f} ms")
	print(f"[edit_img] ----------------------------------------")
	return edited_image

	def clear_media():
	return None, ([0] len(labels_k))

	with gr.Tab("Image Editing"):

	inputs_s = []

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Row():
	with gr.Accordion(open=True, label="Image"):
	image_input = gr.Image(type="filepath", width=512) # , height=550)
	gr.Examples(
	examples=[
	["./data/source/macron.png"],
	["./data/source/einstein.png"],
	["./data/source/taylor.png"],
	["./data/source/portrait1.png"],
	["./data/source/portrait2.png"],
	["./data/source/portrait3.png"],
	],
	inputs=[image_input],
	#cache_mode="lazy",
	visible=True,
	)


	with gr.Row():
	with gr.Column(scale=1):
	with gr.Row(): # Buttons now within a single Row
	edit_btn = gr.Button("Edit")
	clear_btn = gr.Button("Clear")
	#with gr.Row():
	# animate_btn = gr.Button("Generate")



	with gr.Column(scale=1):

	with gr.Row():
	with gr.Accordion(open=True, label="Edited Image"):
	image_output = gr.Image(label="Output Image", type='numpy', interactive=False, width=512)

	sliders = []

	with gr.Accordion("Control Panel", open=True):
	with gr.Tab("Head"):
	with gr.Row():
	for k in labels_k[:3]:
	slider = gr.Slider(minimum=-1.0, maximum=0.5, value=0, label=k)
	inputs_s.append(slider)
	with gr.Row():
	for k in labels_k[3:6]:
	slider = gr.Slider(minimum=-0.5, maximum=0.5, value=0, label=k)
	inputs_s.append(slider)

	with gr.Tab("Mouth"):
	with gr.Row():
	for k in labels_k[6:8]:
	slider = gr.Slider(minimum=-0.4, maximum=0.4, value=0, label=k)
	inputs_s.append(slider)
	with gr.Row():
	for k in labels_k[8:10]:
	slider = gr.Slider(minimum=-0.4, maximum=0.4, value=0, label=k)
	inputs_s.append(slider)

	with gr.Tab("Eyes"):
	with gr.Row():
	for k in labels_k[10:12]:
	slider = gr.Slider(minimum=-0.4, maximum=0.4, value=0, label=k)
	inputs_s.append(slider)
	with gr.Row():
	for k in labels_k[12:14]:
	slider = gr.Slider(minimum=-0.2, maximum=0.2, value=0, label=k)
	inputs_s.append(slider)

	for slider in inputs_s:
	slider.change(
	fn=edit_img,
	inputs=[image_input] + inputs_s,
	outputs=[image_output],

	show_progress='hidden',

	trigger_mode='always_last',

	# currently we have a latency around 450ms
	stream_every=0.5
	)

	edit_btn.click(
	fn=edit_img,
	inputs=[image_input] + inputs_s,
	outputs=[image_output],
	show_progress=True
	)

	clear_btn.click(
	fn=clear_media,
	outputs=[image_output] + inputs_s
	)