Spaces:

LTT
/

DiMeR

Running on Zero

App Files Files Community

LutaoJiang commited on 6 days ago

Commit

c69ffaa

1 Parent(s): 40ae5e2

update

Browse files

Files changed (3) hide show

app.py +4 -5
pipeline/kiss3d_wrapper.py +2 -2
pipeline/utils.py +21 -17

app.py CHANGED Viewed

@@ -187,7 +187,7 @@ else:
         # print(f"Before text_to_detailed: {torch.cuda.memory_allocated() / 1024**3} GB")
         return k3d_wrapper.get_detailed_prompt(prompt, seed)
-    @spaces.GPU
     def text_to_image(prompt, seed=None, strength=1.0,lora_scale=1.0, num_inference_steps=18, redux_hparam=None, init_image=None, **kwargs):
         # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
         # print(f"Before text_to_image: {torch.cuda.memory_allocated() / 1024**3} GB")
@@ -210,7 +210,7 @@ else:
                                         **kwargs)
         return result[-1]
-    @spaces.GPU
     def image2mesh_preprocess_(input_image_, seed, use_mv_rgb=True):
         global preprocessed_input_image
@@ -225,7 +225,7 @@ else:
         return reference_save_path, caption
-    @spaces.GPU
     def image2mesh_main_(reference_3d_bundle_image, caption, seed, strength1=0.5, strength2=0.95, enable_redux=True, use_controlnet=True, if_video=True):
         subprocess.run(['nvidia-smi'])
         global mesh_cache
@@ -252,7 +252,7 @@ else:
             return gen_save_path, recon_mesh_path, mesh_cache
         # return gen_save_path, recon_mesh_path
-    @spaces.GPU
     def bundle_image_to_mesh(
             gen_3d_bundle_image,
             camera_radius=3.5,
@@ -433,7 +433,6 @@ with gr.Blocks(css="""
                             ["A person wearing a virtual reality headset, sitting position, bent legs, clasped hands."],
                             ["A battle mech in a mix of red, blue, and black color, with a cannon on the head."],
                             ["骷髅头, 邪恶的"],
                         ],
                         inputs=[prompt],
                         label="Example Prompts",

         # print(f"Before text_to_detailed: {torch.cuda.memory_allocated() / 1024**3} GB")
         return k3d_wrapper.get_detailed_prompt(prompt, seed)
+    @spaces.GPU(duration=120)
     def text_to_image(prompt, seed=None, strength=1.0,lora_scale=1.0, num_inference_steps=18, redux_hparam=None, init_image=None, **kwargs):
         # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
         # print(f"Before text_to_image: {torch.cuda.memory_allocated() / 1024**3} GB")
                                         **kwargs)
         return result[-1]
+    @spaces.GPU(duration=120)
     def image2mesh_preprocess_(input_image_, seed, use_mv_rgb=True):
         global preprocessed_input_image
         return reference_save_path, caption
+    @spaces.GPU(duration=120)
     def image2mesh_main_(reference_3d_bundle_image, caption, seed, strength1=0.5, strength2=0.95, enable_redux=True, use_controlnet=True, if_video=True):
         subprocess.run(['nvidia-smi'])
         global mesh_cache
             return gen_save_path, recon_mesh_path, mesh_cache
         # return gen_save_path, recon_mesh_path
+    @spaces.GPU(duration=120)
     def bundle_image_to_mesh(
             gen_3d_bundle_image,
             camera_radius=3.5,
                             ["A person wearing a virtual reality headset, sitting position, bent legs, clasped hands."],
                             ["A battle mech in a mix of red, blue, and black color, with a cannon on the head."],
                             ["骷髅头, 邪恶的"],
                         ],
                         inputs=[prompt],
                         label="Example Prompts",

pipeline/kiss3d_wrapper.py CHANGED Viewed

@@ -587,10 +587,10 @@ class kiss3d_wrapper(object):
         rgb_multi_view = rgb_multi_view.to(recon_device) * multi_view_mask + (1 - multi_view_mask)
         with self.context():
             return DiMeR_reconstruct(self.recon_model, self.recon_model_config.infer_config,
                               self.texture_model, self.texture_model_config.infer_config,
-                            rgb_multi_view.to(recon_device), normal_multi_view.to(recon_device), name=self.uuid,
                             input_camera_type='kiss3d', render_3d_bundle_image=save_intermediate_results,
                             render_azimuths=[0, 90, 180, 270],
                             render_radius=lrm_render_radius,

         rgb_multi_view = rgb_multi_view.to(recon_device) * multi_view_mask + (1 - multi_view_mask)
         with self.context():
+            print("Image process done!")
             return DiMeR_reconstruct(self.recon_model, self.recon_model_config.infer_config,
                               self.texture_model, self.texture_model_config.infer_config,
+                            rgb_multi_view.to(recon_device), normal_multi_view.to(recon_device), multi_view_mask, name=self.uuid,
                             input_camera_type='kiss3d', render_3d_bundle_image=save_intermediate_results,
                             render_azimuths=[0, 90, 180, 270],
                             render_radius=lrm_render_radius,

pipeline/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import sys
 import logging
 __workdir__ = '/'.join(os.path.abspath(__file__).split('/')[:-2])
 sys.path.insert(0, __workdir__)
@@ -228,8 +229,8 @@ def preprocess_input_image(input_image):
-def DiMeR_reconstruct(model, infer_config, texture_model, texture_model_config, images, normals,
                     name='', export_texmap=False,
                     input_camera_type='zero123',
                     render_3d_bundle_image=True,
@@ -252,34 +253,37 @@ def DiMeR_reconstruct(model, infer_config, texture_model, texture_model_config,
     else:
         raise NotImplementedError(f'Unexpected input camera type: {input_camera_type}')
-    # use rembg to get foreground mask
-    fg_mask = []
-    for i in range(4):
-        image = images[i].permute(1, 2, 0).cpu().numpy()
-        image = (image * 255).astype(np.uint8)
-        image = rembg.remove(image, session=rembg_session)
-        image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.
-        image = image[3:4]
-        fg_mask.append(image)
-    fg_mask = torch.stack(fg_mask)
-    bg_mask = 1 - fg_mask
     # TODO: Device Check
     global_normals = normal_transfer.trans_local_2_global(normals.cpu().permute(0,2,3,1), torch.tensor([0, 90, 180, 270]),
                                                          torch.tensor([5, 5, 5, 5]), radius=4.5,
                                                          for_lotus=True)
     global_normals = global_normals.permute(0, 3, 1, 2)
-    global_normals = global_normals * fg_mask + bg_mask
     global_normals = F.pad(global_normals, (50, 50, 50, 50), value=1.)
     global_normals = F.interpolate(global_normals, (512, 512), mode='bilinear', align_corners=False)
     global_normals = global_normals.unsqueeze(0).clamp(0.0, 1.0).to(device)
-    images = images.cpu() * fg_mask + bg_mask
     images = F.pad(images, (50, 50, 50, 50), value=1.)
     images = F.interpolate(images, (512, 512), mode='bilinear', align_corners=False)
     images = images.unsqueeze(0).clamp(0.0, 1.0).to(device)
-    logger.info(f"==> Runing DiMeR geometry reconstruction ...")
     planes = model.forward_planes(global_normals, input_cameras)
     vertices, faces, _  = model.extract_mesh(
             planes,
@@ -287,7 +291,7 @@ def DiMeR_reconstruct(model, infer_config, texture_model, texture_model_config,
             **infer_config,
         )
-    logger.info(f"==> Runing DiMeR texture reconstruction ...")
     # extract_mesh函数进行了旋转，进行还原，对齐训练时的方向
     vertices = torch.tensor(vertices, device=device)
     faces = torch.tensor(faces, device=device)

 import os
 import sys
 import logging
+import time
 __workdir__ = '/'.join(os.path.abspath(__file__).split('/')[:-2])
 sys.path.insert(0, __workdir__)
+@torch.no_grad()
+def DiMeR_reconstruct(model, infer_config, texture_model, texture_model_config, images, normals, multi_view_mask,
                     name='', export_texmap=False,
                     input_camera_type='zero123',
                     render_3d_bundle_image=True,
     else:
         raise NotImplementedError(f'Unexpected input camera type: {input_camera_type}')
+    # # use rembg to get foreground mask
+    # fg_mask = []
+    # for i in range(4):
+    #     image = images[i].permute(1, 2, 0).cpu().numpy()
+    #     image = (image * 255).astype(np.uint8)
+    #     image = rembg.remove(image, session=rembg_session)
+    #     image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.
+    #     image = image[3:4]
+    #     fg_mask.append(image)
+    # fg_mask = torch.stack(fg_mask)
+    # bg_mask = 1 - fg_mask
     # TODO: Device Check
     global_normals = normal_transfer.trans_local_2_global(normals.cpu().permute(0,2,3,1), torch.tensor([0, 90, 180, 270]),
                                                          torch.tensor([5, 5, 5, 5]), radius=4.5,
                                                          for_lotus=True)
     global_normals = global_normals.permute(0, 3, 1, 2)
+    global_normals = global_normals * multi_view_mask + (1 - multi_view_mask)
+    # global_normals = global_normals * fg_mask + bg_mask
     global_normals = F.pad(global_normals, (50, 50, 50, 50), value=1.)
     global_normals = F.interpolate(global_normals, (512, 512), mode='bilinear', align_corners=False)
     global_normals = global_normals.unsqueeze(0).clamp(0.0, 1.0).to(device)
+    print(f"{time.time()} ==> local normal to global normal done")
+    # images = images.cpu() * fg_mask + bg_mask
     images = F.pad(images, (50, 50, 50, 50), value=1.)
     images = F.interpolate(images, (512, 512), mode='bilinear', align_corners=False)
     images = images.unsqueeze(0).clamp(0.0, 1.0).to(device)
+    print(f"{time.time()} ==> Runing DiMeR geometry reconstruction ...")
     planes = model.forward_planes(global_normals, input_cameras)
     vertices, faces, _  = model.extract_mesh(
             planes,
             **infer_config,
         )
+    print(f"{time.time()} ==> Runing DiMeR texture reconstruction ...")
     # extract_mesh函数进行了旋转，进行还原，对齐训练时的方向
     vertices = torch.tensor(vertices, device=device)
     faces = torch.tensor(faces, device=device)