Spaces:

Sm0kyWu
/

Amodal3R

Running on Zero

App Files Files Community

Sm0kyWu commited on 6 days ago

Commit

ee77a14

verified ·

1 Parent(s): 00b4b8e

Upload 71 files

Browse files

Files changed (2) hide show

Amodal3R/pipelines/image_to_3d.py +2 -1
app.py +81 -22

Amodal3R/pipelines/image_to_3d.py CHANGED Viewed

@@ -377,6 +377,7 @@ class Amodal3RImageTo3DPipeline(Pipeline):
         slat_sampler_params: dict = {},
         formats: List[str] = ['mesh', 'gaussian'],
         mode: Literal['stochastic', 'multidiffusion'] = 'stochastic',
     ) -> dict:
         """
         Run the pipeline with multiple images as condition
@@ -388,7 +389,7 @@ class Amodal3RImageTo3DPipeline(Pipeline):
             slat_sampler_params (dict): Additional parameters for the structured latent sampler.
             preprocess_image (bool): Whether to preprocess the image.
         """
-        images, masks, masks_occ = zip(*[self.preprocess_image_w_mask(image, mask) for image, mask in zip(images, masks)])
         images = list(images)
         masks = list(masks)
         masks_occ = list(masks_occ)

         slat_sampler_params: dict = {},
         formats: List[str] = ['mesh', 'gaussian'],
         mode: Literal['stochastic', 'multidiffusion'] = 'stochastic',
+        erode_kernel_size: int = 3,
     ) -> dict:
         """
         Run the pipeline with multiple images as condition
             slat_sampler_params (dict): Additional parameters for the structured latent sampler.
             preprocess_image (bool): Whether to preprocess the image.
         """
+        images, masks, masks_occ = zip(*[self.preprocess_image_w_mask(image, mask, erode_kernel_size) for image, mask in zip(images, masks)])
         images = list(images)
         masks = list(masks)
         masks_occ = list(masks_occ)

app.py CHANGED Viewed

@@ -103,6 +103,7 @@ def image_to_3d(
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     req: gr.Request,
 ) -> Tuple[dict, str]:
     """
@@ -136,8 +137,9 @@ def image_to_3d(
             "cfg_strength": slat_guidance_strength,
         },
         mode="stochastic",
     )
-    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
@@ -323,7 +325,7 @@ def delete_mask(mask_list):
         mask_list.pop()
     return mask_list
-def check_combined_mask(image, visibility_mask, mask_list, scale=0.6):
     updated_image = image.copy()
     # combine all the mask:
     combined_mask = np.zeros_like(updated_image[:, :, 0])
@@ -394,13 +396,13 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     with gr.Row():
-        gr.Markdown("""* Step 1 - Generate Visibility Mask and Occlusion Mask.
         * Please wait for a few seconds after uploading the image. The 2D segmenter is getting ready.
-        * Add the point prompts to indicate the target object and occluders separately.
-        * "Render Point", see the position of the point to be added.
-        * "Add Point", the point will be added to the list.
-        * "Generate mask", see the segmented area corresponding to current point list.
-        * "Add mask", current mask will be added for 3D amodal completion.
         """)
     with gr.Row():
         with gr.Column():
@@ -434,11 +436,13 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 undo_vis_mask = gr.Button("Undo Last Mask")
             vis_input = gr.Image(label='Visible Input', interactive=False, height=300)
             with gr.Row():
-                zoom_scale = gr.Slider(0.3, 1.0, label="Target Object Scale", value=0.6, step=0.1)
                 check_visible_input = gr.Button("Generate Occluded Input")
     with gr.Row():
-        gr.Markdown("""* Step 2 - 3D Amodal Completion.
         * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
         * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
         """)
     with gr.Row():
@@ -446,6 +450,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
             with gr.Accordion(label="Generation Settings", open=True):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
                 randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
                     ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
@@ -454,10 +459,37 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 with gr.Row():
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-            generate_btn = gr.Button("Generate")
         with gr.Column():
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
     # # Handlers
     demo.load(start_session)
     demo.unload(end_session)
@@ -536,21 +568,48 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     # 3D Amodal Reconstruction
-    # generate_btn.click(
-    #     get_seed,
-    #     inputs=[randomize_seed, seed],
-    #     outputs=[seed],
-    # ).then(
-    #     image_to_3d,
-    #     inputs=[vis_input, occluded_mask, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
-    #     outputs=[output_buf, video_output],
-    # )
     generate_btn.click(
         image_to_3d,
-        inputs=[vis_input, occluded_mask, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     )
 # 启动 Gradio App

     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
+    erode_kernel_size: int,
     req: gr.Request,
 ) -> Tuple[dict, str]:
     """
             "cfg_strength": slat_guidance_strength,
         },
         mode="stochastic",
+        erode_kernel_size=erode_kernel_size,
     )
+    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120, bg_color=(1,1,1))['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
         mask_list.pop()
     return mask_list
+def check_combined_mask(image, visibility_mask, mask_list, scale=0.65):
     updated_image = image.copy()
     # combine all the mask:
     combined_mask = np.zeros_like(updated_image[:, :, 0])
     with gr.Row():
+        gr.Markdown("""
+        ### Step 1 - Generate Visibility Mask and Occlusion Mask.
         * Please wait for a few seconds after uploading the image. The 2D segmenter is getting ready.
+        * Add the point prompts to indicate the target object.
+        * "Render Point", see the position of the point to be added. "Add Point", the point will be added to the list.
+        * "Generate mask", see the segmented area corresponding to current point list. "Add mask", current mask will be added for 3D amodal completion.
+        * The target object need to be put in the center of the image, the scale can be adjusted for better reconstruction.
         """)
     with gr.Row():
         with gr.Column():
                 undo_vis_mask = gr.Button("Undo Last Mask")
             vis_input = gr.Image(label='Visible Input', interactive=False, height=300)
             with gr.Row():
+                zoom_scale = gr.Slider(0.3, 1.0, label="Target Object Scale", value=0.68, step=0.1)
                 check_visible_input = gr.Button("Generate Occluded Input")
     with gr.Row():
+        gr.Markdown("""
+        ### Step 2 - 3D Amodal Completion.
         * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
+        * The boundary of the segmentation may not be accurate, so here we provide the option to erode the visible area.
         * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
         """)
     with gr.Row():
             with gr.Accordion(label="Generation Settings", open=True):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
                 randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                erode_kernel_size = gr.Slider(0, 5, label="Erode Kernel Size", value=0, step=1)
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
                     ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                 with gr.Row():
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+            generate_btn = gr.Button("Amodal 3D Reconstruction")
+            with gr.Accordion(label="GLB Extraction Settings", open=False):
+                mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
+                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
+            with gr.Row():
+                extract_glb_btn = gr.Button("Extract GLB")
+                extract_gs_btn = gr.Button("Extract Gaussian")
+            gr.Markdown("""
+                        *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
+                        """)
         with gr.Column():
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
+            model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
+            with gr.Row():
+                download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
+                download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
+    with gr.Row() as single_image_example:
+        examples = gr.Examples(
+            examples=[
+                f'assets/example_image/{image}'
+                for image in os.listdir("assets/example_image")
+            ],
+            inputs=[input_image],
+            fn=lambda image: input_image.upload(image),
+            outputs=[predictor, original_image, message],
+            run_on_click=True,
+            examples_per_page=12,
+        )
     # # Handlers
     demo.load(start_session)
     demo.unload(end_session)
     # 3D Amodal Reconstruction
     generate_btn.click(
+        get_seed,
+        inputs=[randomize_seed, seed],
+        outputs=[seed],
+    ).then(
         image_to_3d,
+        inputs=[vis_input, occluded_mask, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, erode_kernel_size],
         outputs=[output_buf, video_output],
+    ).then(
+        lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
+    )
+    video_output.clear(
+        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
+    )
+    extract_glb_btn.click(
+        extract_glb,
+        inputs=[output_buf, mesh_simplify, texture_size],
+        outputs=[model_output, download_glb],
+    ).then(
+        lambda: gr.Button(interactive=True),
+        outputs=[download_glb],
+    )
+    extract_gs_btn.click(
+        extract_gaussian,
+        inputs=[output_buf],
+        outputs=[model_output, download_gs],
+    ).then(
+        lambda: gr.Button(interactive=True),
+        outputs=[download_gs],
     )
+    model_output.clear(
+        lambda: gr.Button(interactive=False),
+        outputs=[download_glb],
+    )
 # 启动 Gradio App