|
''' |
|
sudo apt-get update && sudo apt-get install git-lfs ffmpeg cbm |
|
conda install python=3.10 |
|
|
|
git clone https://huggingface.co/spaces/svjack/LHM && cd LHM |
|
pip install -r requirements.txt |
|
|
|
python app.py |
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
os.system("rm -rf /data-nvme/zerogpu-offload/") |
|
os.system("pip install chumpy") |
|
os.system("pip uninstall -y basicsr") |
|
os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/") |
|
os.system("pip install numpy==1.23.0") |
|
os.system("pip install ./wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl") |
|
os.system("pip install ./wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl") |
|
os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html") |
|
import cv2 |
|
import time |
|
from PIL import Image |
|
import numpy as np |
|
import gradio as gr |
|
import base64 |
|
import spaces |
|
import torch |
|
torch._dynamo.config.disable = True |
|
import subprocess |
|
import os |
|
import argparse |
|
from omegaconf import OmegaConf |
|
from rembg import remove |
|
from engine.pose_estimation.pose_estimator import PoseEstimator |
|
from LHM.utils.face_detector import VGGHeadDetector |
|
from LHM.utils.hf_hub import wrap_model_hub |
|
from LHM.runners.infer.utils import ( |
|
calc_new_tgt_size_by_aspect, |
|
center_crop_according_to_mask, |
|
prepare_motion_seqs, |
|
resize_image_keepaspect_np, |
|
) |
|
from LHM.utils.ffmpeg_utils import images_to_video |
|
from engine.SegmentAPI.base import Bbox |
|
|
|
def get_bbox(mask): |
|
height, width = mask.shape |
|
pha = mask / 255.0 |
|
pha[pha < 0.5] = 0.0 |
|
pha[pha >= 0.5] = 1.0 |
|
|
|
|
|
_h, _w = np.where(pha == 1) |
|
|
|
whwh = [ |
|
_w.min().item(), |
|
_h.min().item(), |
|
_w.max().item(), |
|
_h.max().item(), |
|
] |
|
|
|
box = Bbox(whwh) |
|
|
|
|
|
scale_box = box.scale(1.1, width=width, height=height) |
|
return scale_box |
|
|
|
def infer_preprocess_image( |
|
rgb_path, |
|
mask, |
|
intr, |
|
pad_ratio, |
|
bg_color, |
|
max_tgt_size, |
|
aspect_standard, |
|
enlarge_ratio, |
|
render_tgt_size, |
|
multiply, |
|
need_mask=True, |
|
): |
|
"""inferece |
|
image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0, |
|
max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0], |
|
render_tgt_size=source_size, multiply=14, need_mask=True) |
|
|
|
""" |
|
|
|
rgb = np.array(Image.open(rgb_path)) |
|
rgb_raw = rgb.copy() |
|
|
|
bbox = get_bbox(mask) |
|
bbox_list = bbox.get_box() |
|
|
|
rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]] |
|
mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]] |
|
|
|
h, w, _ = rgb.shape |
|
assert w < h |
|
cur_ratio = h / w |
|
scale_ratio = cur_ratio / aspect_standard |
|
|
|
target_w = int(min(w * scale_ratio, h)) |
|
offset_w = (target_w - w) // 2 |
|
|
|
if offset_w > 0: |
|
rgb = np.pad( |
|
rgb, |
|
((0, 0), (offset_w, offset_w), (0, 0)), |
|
mode="constant", |
|
constant_values=255, |
|
) |
|
mask = np.pad( |
|
mask, |
|
((0, 0), (offset_w, offset_w)), |
|
mode="constant", |
|
constant_values=0, |
|
) |
|
else: |
|
offset_w = -offset_w |
|
rgb = rgb[:,offset_w:-offset_w,:] |
|
mask = mask[:,offset_w:-offset_w] |
|
|
|
|
|
|
|
rgb = np.pad( |
|
rgb, |
|
((0, 0), (offset_w, offset_w), (0, 0)), |
|
mode="constant", |
|
constant_values=255, |
|
) |
|
|
|
mask = np.pad( |
|
mask, |
|
((0, 0), (offset_w, offset_w)), |
|
mode="constant", |
|
constant_values=0, |
|
) |
|
|
|
rgb = rgb / 255.0 |
|
mask = mask / 255.0 |
|
|
|
mask = (mask > 0.5).astype(np.float32) |
|
rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None]) |
|
|
|
|
|
rgb = resize_image_keepaspect_np(rgb, max_tgt_size) |
|
mask = resize_image_keepaspect_np(mask, max_tgt_size) |
|
|
|
|
|
rgb, mask, offset_x, offset_y = center_crop_according_to_mask( |
|
rgb, mask, aspect_standard, enlarge_ratio |
|
) |
|
if intr is not None: |
|
intr[0, 2] -= offset_x |
|
intr[1, 2] -= offset_y |
|
|
|
|
|
|
|
tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect( |
|
cur_hw=rgb.shape[:2], |
|
aspect_standard=aspect_standard, |
|
tgt_size=render_tgt_size, |
|
multiply=multiply, |
|
) |
|
|
|
rgb = cv2.resize( |
|
rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA |
|
) |
|
mask = cv2.resize( |
|
mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA |
|
) |
|
|
|
if intr is not None: |
|
|
|
|
|
intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y) |
|
assert ( |
|
abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5 |
|
), f"{intr[0, 2] * 2}, {rgb.shape[1]}" |
|
assert ( |
|
abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5 |
|
), f"{intr[1, 2] * 2}, {rgb.shape[0]}" |
|
|
|
|
|
intr[0, 2] = rgb.shape[1] // 2 |
|
intr[1, 2] = rgb.shape[0] // 2 |
|
|
|
rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) |
|
mask = ( |
|
torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0) |
|
) |
|
return rgb, mask, intr |
|
|
|
def parse_configs(): |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--config", type=str) |
|
parser.add_argument("--infer", type=str) |
|
args, unknown = parser.parse_known_args() |
|
|
|
cfg = OmegaConf.create() |
|
cli_cfg = OmegaConf.from_cli(unknown) |
|
|
|
|
|
if os.environ.get("APP_INFER") is not None: |
|
args.infer = os.environ.get("APP_INFER") |
|
if os.environ.get("APP_MODEL_NAME") is not None: |
|
cli_cfg.model_name = os.environ.get("APP_MODEL_NAME") |
|
|
|
args.config = args.infer if args.config is None else args.config |
|
|
|
if args.config is not None: |
|
cfg_train = OmegaConf.load(args.config) |
|
cfg.source_size = cfg_train.dataset.source_image_res |
|
try: |
|
cfg.src_head_size = cfg_train.dataset.src_head_size |
|
except: |
|
cfg.src_head_size = 112 |
|
cfg.render_size = cfg_train.dataset.render_image.high |
|
_relative_path = os.path.join( |
|
cfg_train.experiment.parent, |
|
cfg_train.experiment.child, |
|
os.path.basename(cli_cfg.model_name).split("_")[-1], |
|
) |
|
|
|
cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path) |
|
cfg.image_dump = os.path.join("exps", "images", _relative_path) |
|
cfg.video_dump = os.path.join("exps", "videos", _relative_path) |
|
|
|
if args.infer is not None: |
|
cfg_infer = OmegaConf.load(args.infer) |
|
cfg.merge_with(cfg_infer) |
|
cfg.setdefault( |
|
"save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp") |
|
) |
|
cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images")) |
|
cfg.setdefault( |
|
"video_dump", os.path.join("dumps", cli_cfg.model_name, "videos") |
|
) |
|
cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes")) |
|
|
|
cfg.motion_video_read_fps = 6 |
|
cfg.merge_with(cli_cfg) |
|
|
|
cfg.setdefault("logger", "INFO") |
|
|
|
assert cfg.model_name is not None, "model_name is required" |
|
|
|
return cfg, cfg_train |
|
|
|
def _build_model(cfg): |
|
from LHM.models import model_dict |
|
|
|
hf_model_cls = wrap_model_hub(model_dict["human_lrm_sapdino_bh_sd3_5"]) |
|
model = hf_model_cls.from_pretrained(cfg.model_name) |
|
|
|
return model |
|
|
|
def launch_pretrained(): |
|
from huggingface_hub import snapshot_download, hf_hub_download |
|
hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='assets.tar', local_dir="./") |
|
os.system("tar -xf assets.tar && rm assets.tar") |
|
hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM-0.5B.tar', local_dir="./") |
|
os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar") |
|
hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM_prior_model.tar', local_dir="./") |
|
os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar") |
|
|
|
def launch_env_not_compile_with_cuda(): |
|
os.system("pip install chumpy") |
|
os.system("pip uninstall -y basicsr") |
|
os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/") |
|
os.system("pip install numpy==1.23.0") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors): |
|
'''Inference code avoid repeat forward. |
|
''' |
|
render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int( |
|
render_intrs[0, 0, 0, 2] * 2 |
|
) |
|
|
|
render_res_list = [] |
|
num_views = render_c2ws.shape[1] |
|
start_time = time.time() |
|
|
|
|
|
render_res_list = [] |
|
|
|
for view_idx in range(num_views): |
|
render_res = renderer.forward_animate_gs( |
|
gs_model_list, |
|
query_points, |
|
renderer.get_single_view_smpl_data(smplx_params, view_idx), |
|
render_c2ws[:, view_idx : view_idx + 1], |
|
render_intrs[:, view_idx : view_idx + 1], |
|
render_h, |
|
render_w, |
|
render_bg_colors[:, view_idx : view_idx + 1], |
|
) |
|
render_res_list.append(render_res) |
|
print( |
|
f"time elpased(animate gs model per frame):{(time.time() - start_time)/num_views}" |
|
) |
|
|
|
out = defaultdict(list) |
|
for res in render_res_list: |
|
for k, v in res.items(): |
|
if isinstance(v[0], torch.Tensor): |
|
out[k].append(v.detach().cpu()) |
|
else: |
|
out[k].append(v) |
|
for k, v in out.items(): |
|
|
|
if isinstance(v[0], torch.Tensor): |
|
out[k] = torch.concat(v, dim=1) |
|
if k in ["comp_rgb", "comp_mask", "comp_depth"]: |
|
out[k] = out[k][0].permute( |
|
0, 2, 3, 1 |
|
) |
|
else: |
|
out[k] = v |
|
return out |
|
|
|
def assert_input_image(input_image): |
|
if input_image is None: |
|
raise gr.Error("No image selected or uploaded!") |
|
|
|
def prepare_working_dir(): |
|
import tempfile |
|
working_dir = tempfile.TemporaryDirectory() |
|
return working_dir |
|
|
|
def init_preprocessor(): |
|
from LHM.utils.preprocess import Preprocessor |
|
global preprocessor |
|
preprocessor = Preprocessor() |
|
|
|
def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool, working_dir): |
|
image_raw = os.path.join(working_dir.name, "raw.png") |
|
with Image.fromarray(image_in) as img: |
|
img.save(image_raw) |
|
image_out = os.path.join(working_dir.name, "rembg.png") |
|
success = preprocessor.preprocess(image_path=image_raw, save_path=image_out, rmbg=remove_bg, recenter=recenter) |
|
assert success, f"Failed under preprocess_fn!" |
|
return image_out |
|
|
|
def get_image_base64(path): |
|
with open(path, "rb") as image_file: |
|
encoded_string = base64.b64encode(image_file.read()).decode() |
|
return f"data:image/png;base64,{encoded_string}" |
|
|
|
|
|
def demo_lhm(pose_estimator, face_detector, lhm, cfg): |
|
|
|
@spaces.GPU(duration=80) |
|
def core_fn(image: str, video_params, working_dir): |
|
image_raw = os.path.join(working_dir.name, "raw.png") |
|
with Image.fromarray(image) as img: |
|
img.save(image_raw) |
|
|
|
base_vid = os.path.basename(video_params).split("_")[0] |
|
smplx_params_dir = os.path.join("./assets/sample_motion", base_vid, "smplx_params") |
|
|
|
dump_video_path = os.path.join(working_dir.name, "output.mp4") |
|
dump_image_path = os.path.join(working_dir.name, "output.png") |
|
|
|
|
|
|
|
omit_prefix = os.path.dirname(image_raw) |
|
image_name = os.path.basename(image_raw) |
|
uid = image_name.split(".")[0] |
|
subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "") |
|
subdir_path = ( |
|
subdir_path[1:] if subdir_path.startswith("/") else subdir_path |
|
) |
|
print("subdir_path and uid:", subdir_path, uid) |
|
|
|
motion_seqs_dir = smplx_params_dir |
|
|
|
motion_name = os.path.dirname( |
|
motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir |
|
) |
|
|
|
motion_name = os.path.basename(motion_name) |
|
|
|
dump_image_dir = os.path.dirname(dump_image_path) |
|
os.makedirs(dump_image_dir, exist_ok=True) |
|
|
|
print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path) |
|
|
|
dump_tmp_dir = dump_image_dir |
|
|
|
shape_pose = pose_estimator(image_raw) |
|
assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}" |
|
|
|
if os.path.exists(dump_video_path): |
|
return dump_image_path, dump_video_path |
|
source_size = cfg.source_size |
|
render_size = cfg.render_size |
|
render_fps = 30 |
|
|
|
aspect_standard = 5.0 / 3 |
|
motion_img_need_mask = cfg.get("motion_img_need_mask", False) |
|
vis_motion = cfg.get("vis_motion", False) |
|
|
|
|
|
input_np = cv2.imread(image_raw) |
|
output_np = remove(input_np) |
|
|
|
parsing_mask = output_np[:,:,3] |
|
|
|
|
|
image, _, _ = infer_preprocess_image( |
|
image_raw, |
|
mask=parsing_mask, |
|
intr=None, |
|
pad_ratio=0, |
|
bg_color=1.0, |
|
max_tgt_size=896, |
|
aspect_standard=aspect_standard, |
|
enlarge_ratio=[1.0, 1.0], |
|
render_tgt_size=source_size, |
|
multiply=14, |
|
need_mask=True, |
|
) |
|
|
|
try: |
|
rgb = np.array(Image.open(image_path)) |
|
rgb = torch.from_numpy(rgb).permute(2, 0, 1) |
|
bbox = face_detector.detect_face(rgb) |
|
head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])] |
|
head_rgb = head_rgb.permute(1, 2, 0) |
|
src_head_rgb = head_rgb.cpu().numpy() |
|
except: |
|
print("w/o head input!") |
|
src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8) |
|
|
|
|
|
try: |
|
src_head_rgb = cv2.resize( |
|
src_head_rgb, |
|
dsize=(cfg.src_head_size, cfg.src_head_size), |
|
interpolation=cv2.INTER_AREA, |
|
) |
|
except: |
|
src_head_rgb = np.zeros( |
|
(cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8 |
|
) |
|
|
|
src_head_rgb = ( |
|
torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0) |
|
) |
|
|
|
save_ref_img_path = os.path.join( |
|
dump_tmp_dir, "output.png" |
|
) |
|
vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype( |
|
np.uint8 |
|
) |
|
Image.fromarray(vis_ref_img).save(save_ref_img_path) |
|
|
|
|
|
motion_name = os.path.dirname( |
|
motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir |
|
) |
|
motion_name = os.path.basename(motion_name) |
|
|
|
motion_seq = prepare_motion_seqs( |
|
motion_seqs_dir, |
|
None, |
|
save_root=dump_tmp_dir, |
|
fps=30, |
|
bg_color=1.0, |
|
aspect_standard=aspect_standard, |
|
enlarge_ratio=[1.0, 1, 0], |
|
render_image_res=render_size, |
|
multiply=16, |
|
need_mask=motion_img_need_mask, |
|
vis_motion=vis_motion, |
|
) |
|
|
|
camera_size = len(motion_seq["motion_seqs"]) |
|
shape_param = shape_pose.beta |
|
|
|
device = "cuda" |
|
dtype = torch.float32 |
|
shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0) |
|
|
|
lhm.to(dtype) |
|
|
|
smplx_params = motion_seq['smplx_params'] |
|
smplx_params['betas'] = shape_param.to(device) |
|
|
|
gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view( |
|
image.unsqueeze(0).to(device, dtype), |
|
src_head_rgb.unsqueeze(0).to(device, dtype), |
|
None, |
|
None, |
|
render_c2ws=motion_seq["render_c2ws"].to(device), |
|
render_intrs=motion_seq["render_intrs"].to(device), |
|
render_bg_colors=motion_seq["render_bg_colors"].to(device), |
|
smplx_params={ |
|
k: v.to(device) for k, v in smplx_params.items() |
|
}, |
|
) |
|
|
|
|
|
start_time = time.time() |
|
batch_dict = dict() |
|
batch_size = 80 |
|
|
|
for batch_i in range(0, camera_size, batch_size): |
|
with torch.no_grad(): |
|
|
|
|
|
keys = [ |
|
"root_pose", |
|
"body_pose", |
|
"jaw_pose", |
|
"leye_pose", |
|
"reye_pose", |
|
"lhand_pose", |
|
"rhand_pose", |
|
"trans", |
|
"focal", |
|
"princpt", |
|
"img_size_wh", |
|
"expr", |
|
] |
|
batch_smplx_params = dict() |
|
batch_smplx_params["betas"] = shape_param.to(device) |
|
batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose |
|
for key in keys: |
|
batch_smplx_params[key] = motion_seq["smplx_params"][key][ |
|
:, batch_i : batch_i + batch_size |
|
].to(device) |
|
|
|
res = lhm.animation_infer(gs_model_list, query_points, batch_smplx_params, |
|
render_c2ws=motion_seq["render_c2ws"][ |
|
:, batch_i : batch_i + batch_size |
|
].to(device), |
|
render_intrs=motion_seq["render_intrs"][ |
|
:, batch_i : batch_i + batch_size |
|
].to(device), |
|
render_bg_colors=motion_seq["render_bg_colors"][ |
|
:, batch_i : batch_i + batch_size |
|
].to(device), |
|
) |
|
|
|
for accumulate_key in ["comp_rgb", "comp_mask"]: |
|
if accumulate_key not in batch_dict: |
|
batch_dict[accumulate_key] = [] |
|
batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu()) |
|
del res |
|
torch.cuda.empty_cache() |
|
|
|
for accumulate_key in ["comp_rgb", "comp_mask"]: |
|
batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0) |
|
|
|
print(f"time elapsed: {time.time() - start_time}") |
|
rgb = batch_dict["comp_rgb"].detach().cpu().numpy() |
|
mask = batch_dict["comp_mask"].detach().cpu().numpy() |
|
mask[mask < 0.5] = 0.0 |
|
|
|
rgb = rgb * mask + (1 - mask) * 1 |
|
rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8) |
|
|
|
if vis_motion: |
|
|
|
|
|
vis_ref_img = np.tile( |
|
cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[ |
|
None, :, :, : |
|
], |
|
(rgb.shape[0], 1, 1, 1), |
|
) |
|
rgb = np.concatenate( |
|
[rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2 |
|
) |
|
|
|
os.makedirs(os.path.dirname(dump_video_path), exist_ok=True) |
|
|
|
images_to_video( |
|
rgb, |
|
output_path=dump_video_path, |
|
fps=30, |
|
gradio_codec=False, |
|
verbose=True, |
|
) |
|
|
|
return dump_image_path, dump_video_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_TITLE = '''LHM: Large Animatable Human Model''' |
|
|
|
_DESCRIPTION = ''' |
|
<strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong> |
|
''' |
|
|
|
with gr.Blocks(analytics_enabled=False) as demo: |
|
|
|
|
|
logo_url = "./assets/rgba_logo_new.png" |
|
logo_base64 = get_image_base64(logo_url) |
|
gr.HTML( |
|
f""" |
|
<div style="display: flex; justify-content: center; align-items: center; text-align: center;"> |
|
<div> |
|
<h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1> |
|
</div> |
|
</div> |
|
""" |
|
) |
|
|
|
|
|
gr.HTML( |
|
""" |
|
<div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;"> |
|
<a class="flex-item" href="https://arxiv.org/abs/2503.10625" target="_blank"> |
|
<img src="https://img.shields.io/badge/Paper-arXiv-darkred.svg" alt="arXiv Paper"> |
|
</a> |
|
<a class="flex-item" href="https://lingtengqiu.github.io/LHM/" target="_blank"> |
|
<img src="https://img.shields.io/badge/Project-LHM-blue" alt="Project Page"> |
|
</a> |
|
<a class="flex-item" href="https://github.com/aigc3d/LHM" target="_blank"> |
|
<img src="https://img.shields.io/github/stars/aigc3d/LHM?label=Github%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars"> |
|
</a> |
|
<a class="flex-item" href="https://www.youtube.com/watch?v=tivEpz_yiEo" target="_blank"> |
|
<img src="https://img.shields.io/badge/Youtube-Video-red.svg" alt="Video"> |
|
</a> |
|
</div> |
|
""" |
|
) |
|
|
|
gr.HTML( |
|
"""<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors. We simplify the pipeline in spaces: 1) using Rembg instead of SAM2; 2) limit the output video length to 10s; For best visual quality, try the inference code on Github instead.</h4></p>""" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(variant='panel', scale=1): |
|
with gr.Tabs(elem_id="openlrm_input_image"): |
|
with gr.TabItem('Input Image'): |
|
with gr.Row(): |
|
input_image = gr.Image(label="Input Image", image_mode="RGBA", height=480, width=270, sources="upload", type="numpy", elem_id="content_image") |
|
|
|
with gr.Row(): |
|
examples = [ |
|
['assets/sample_input/joker.jpg'], |
|
['assets/sample_input/anime.png'], |
|
['assets/sample_input/basket.png'], |
|
['assets/sample_input/ai_woman1.JPG'], |
|
['assets/sample_input/anime2.JPG'], |
|
['assets/sample_input/anime3.JPG'], |
|
['assets/sample_input/boy1.png'], |
|
['assets/sample_input/choplin.jpg'], |
|
['assets/sample_input/eins.JPG'], |
|
['assets/sample_input/girl1.png'], |
|
['assets/sample_input/girl2.png'], |
|
['assets/sample_input/robot.jpg'], |
|
] |
|
gr.Examples( |
|
examples=examples, |
|
inputs=[input_image], |
|
examples_per_page=20, |
|
) |
|
|
|
with gr.Column(): |
|
with gr.Tabs(elem_id="openlrm_input_video"): |
|
with gr.TabItem('Input Video'): |
|
with gr.Row(): |
|
video_input = gr.Video(label="Input Video",height=480, width=270, interactive=False) |
|
|
|
examples = [ |
|
|
|
'./assets/sample_motion/ex5/ex5_origin.mp4', |
|
|
|
|
|
'./assets/sample_motion/mimo1/mimo1_origin.mp4', |
|
'./assets/sample_motion/mimo2/mimo2_origin.mp4', |
|
'./assets/sample_motion/mimo4/mimo4_origin.mp4', |
|
'./assets/sample_motion/mimo5/mimo5_origin.mp4', |
|
'./assets/sample_motion/mimo6/mimo6_origin.mp4', |
|
'./assets/sample_motion/nezha/nezha_origin.mp4', |
|
'./assets/sample_motion/taiji/taiji_origin.mp4' |
|
] |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=[video_input], |
|
examples_per_page=20, |
|
) |
|
with gr.Column(variant='panel', scale=1): |
|
with gr.Tabs(elem_id="openlrm_processed_image"): |
|
with gr.TabItem('Processed Image'): |
|
with gr.Row(): |
|
processed_image = gr.Image(label="Processed Image", image_mode="RGBA", type="filepath", elem_id="processed_image", height=480, width=270, interactive=False) |
|
|
|
with gr.Column(variant='panel', scale=1): |
|
with gr.Tabs(elem_id="openlrm_render_video"): |
|
with gr.TabItem('Rendered Video'): |
|
with gr.Row(): |
|
output_video = gr.Video(label="Rendered Video", format="mp4", height=480, width=270, autoplay=True) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(variant='panel', scale=1): |
|
submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary') |
|
|
|
|
|
working_dir = gr.State() |
|
submit.click( |
|
fn=assert_input_image, |
|
inputs=[input_image], |
|
queue=False, |
|
).success( |
|
fn=prepare_working_dir, |
|
outputs=[working_dir], |
|
queue=False, |
|
).success( |
|
fn=core_fn, |
|
inputs=[input_image, video_input, working_dir], |
|
outputs=[processed_image, output_video], |
|
) |
|
|
|
demo.queue(max_size=1) |
|
demo.launch(share = True) |
|
|
|
|
|
def launch_gradio_app(): |
|
|
|
os.environ.update({ |
|
"APP_ENABLED": "1", |
|
"APP_MODEL_NAME": "./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/", |
|
"APP_INFER": "./configs/inference/human-lrm-500M.yaml", |
|
"APP_TYPE": "infer.human_lrm", |
|
"NUMBA_THREADING_LAYER": 'omp', |
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
facedetector = VGGHeadDetector( |
|
"./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd", |
|
device='cpu', |
|
) |
|
facedetector.to('cuda') |
|
|
|
pose_estimator = PoseEstimator( |
|
"./pretrained_models/human_model_files/", device='cpu' |
|
) |
|
pose_estimator.to('cuda') |
|
pose_estimator.device = 'cuda' |
|
|
|
cfg, cfg_train = parse_configs() |
|
lhm = _build_model(cfg) |
|
lhm.to('cuda') |
|
|
|
demo_lhm(pose_estimator, facedetector, lhm, cfg) |
|
|
|
if __name__ == '__main__': |
|
launch_pretrained() |
|
|
|
launch_gradio_app() |