Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Initial commit
Browse files- .gitattributes +3 -0
- app.py +164 -0
- assets/controlnet_demo1.png +3 -0
- assets/controlnet_demo2.png +3 -0
- assets/examples/demo1.png +3 -0
- assets/examples/demo10.png +3 -0
- assets/examples/demo11.png +3 -0
- assets/examples/demo12.png +3 -0
- assets/examples/demo13.png +3 -0
- assets/examples/demo14.png +3 -0
- assets/examples/demo15.png +3 -0
- assets/examples/demo16.png +3 -0
- assets/examples/demo17.png +3 -0
- assets/examples/demo18.png +3 -0
- assets/examples/demo19.png +3 -0
- assets/examples/demo2.png +3 -0
- assets/examples/demo20.png +3 -0
- assets/examples/demo3.png +3 -0
- assets/examples/demo4.png +3 -0
- assets/examples/demo5.png +3 -0
- assets/examples/demo7.png +3 -0
- assets/examples/demo8.png +3 -0
- assets/examples/demo9.png +3 -0
- assets/examples_video/davis_dolphins.mp4 +3 -0
- assets/examples_video/davis_rollercoaster.mp4 +3 -0
- assets/examples_video/davis_seasnake.mp4 +3 -0
- assets/paper.pdf +3 -0
- assets/teaser.png +3 -0
- assets/video_edit/demo1_midas.mp4 +3 -0
- assets/video_edit/demo1_ours.mp4 +3 -0
- assets/video_edit/demo1_video.mp4 +3 -0
- assets/video_edit/demo2_midas.mp4 +3 -0
- assets/video_edit/demo2_ours.mp4 +3 -0
- assets/video_edit/demo2_video.mp4 +3 -0
- requirements.txt +6 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            *.png filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
            +
            *.mp4 filter=lfs diff=lfs merge=lfs -text
         | 
| 38 | 
            +
            *.pdf filter=lfs diff=lfs merge=lfs -text
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,164 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            import cv2
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
            import os
         | 
| 5 | 
            +
            import torch
         | 
| 6 | 
            +
            import torch.nn.functional as F
         | 
| 7 | 
            +
            from torchvision.transforms import Compose
         | 
| 8 | 
            +
            import tempfile
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            from depth_anything.dpt import DepthAnything
         | 
| 11 | 
            +
            from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            def make_video(video_path, outdir='./vis_video_depth',encoder='vitl'):
         | 
| 14 | 
            +
                # Define path for temporary processed frames
         | 
| 15 | 
            +
                temp_frame_dir = tempfile.mkdtemp()
         | 
| 16 | 
            +
                
         | 
| 17 | 
            +
                margin_width = 50
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
         | 
| 20 | 
            +
                
         | 
| 21 | 
            +
                depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
         | 
| 22 | 
            +
                
         | 
| 23 | 
            +
                total_params = sum(param.numel() for param in depth_anything.parameters())
         | 
| 24 | 
            +
                print('Total parameters: {:.2f}M'.format(total_params / 1e6))
         | 
| 25 | 
            +
                
         | 
| 26 | 
            +
                transform = Compose([
         | 
| 27 | 
            +
                    Resize(
         | 
| 28 | 
            +
                        width=518,
         | 
| 29 | 
            +
                        height=518,
         | 
| 30 | 
            +
                        resize_target=False,
         | 
| 31 | 
            +
                        keep_aspect_ratio=True,
         | 
| 32 | 
            +
                        ensure_multiple_of=14,
         | 
| 33 | 
            +
                        resize_method='lower_bound',
         | 
| 34 | 
            +
                        image_interpolation_method=cv2.INTER_CUBIC,
         | 
| 35 | 
            +
                    ),
         | 
| 36 | 
            +
                    NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         | 
| 37 | 
            +
                    PrepareForNet(),
         | 
| 38 | 
            +
                ])
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                if os.path.isfile(video_path):
         | 
| 41 | 
            +
                    if video_path.endswith('txt'):
         | 
| 42 | 
            +
                        with open(video_path, 'r') as f:
         | 
| 43 | 
            +
                            lines = f.read().splitlines()
         | 
| 44 | 
            +
                    else:
         | 
| 45 | 
            +
                        filenames = [video_path]
         | 
| 46 | 
            +
                else:
         | 
| 47 | 
            +
                    filenames = os.listdir(video_path)
         | 
| 48 | 
            +
                    filenames = [os.path.join(video_path, filename) for filename in filenames if not filename.startswith('.')]
         | 
| 49 | 
            +
                    filenames.sort()
         | 
| 50 | 
            +
                
         | 
| 51 | 
            +
                # os.makedirs(outdir, exist_ok=True)
         | 
| 52 | 
            +
                
         | 
| 53 | 
            +
                for k, filename in enumerate(filenames):
         | 
| 54 | 
            +
                    print('Progress {:}/{:},'.format(k+1, len(filenames)), 'Processing', filename)
         | 
| 55 | 
            +
                    
         | 
| 56 | 
            +
                    raw_video = cv2.VideoCapture(filename)
         | 
| 57 | 
            +
                    frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
         | 
| 58 | 
            +
                    frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
         | 
| 59 | 
            +
                    output_width = frame_width * 2 + margin_width
         | 
| 60 | 
            +
                    
         | 
| 61 | 
            +
                    filename = os.path.basename(filename)
         | 
| 62 | 
            +
                    # output_path = os.path.join(outdir, filename[:filename.rfind('.')] + '_video_depth.mp4')
         | 
| 63 | 
            +
                    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
         | 
| 64 | 
            +
                        output_path = tmpfile.name
         | 
| 65 | 
            +
                    #out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"avc1"), frame_rate, (output_width, frame_height))
         | 
| 66 | 
            +
                    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         | 
| 67 | 
            +
                    out = cv2.VideoWriter(output_path, fourcc, frame_rate, (output_width, frame_height))
         | 
| 68 | 
            +
                    # count=0
         | 
| 69 | 
            +
                    while raw_video.isOpened():
         | 
| 70 | 
            +
                        ret, raw_frame = raw_video.read()
         | 
| 71 | 
            +
                        if not ret:
         | 
| 72 | 
            +
                            break
         | 
| 73 | 
            +
                        
         | 
| 74 | 
            +
                        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
         | 
| 75 | 
            +
                        
         | 
| 76 | 
            +
                        frame = transform({'image': frame})['image']
         | 
| 77 | 
            +
                        frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
         | 
| 78 | 
            +
                        
         | 
| 79 | 
            +
                        with torch.no_grad():
         | 
| 80 | 
            +
                            depth = depth_anything(frame)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                        depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
         | 
| 83 | 
            +
                        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
         | 
| 84 | 
            +
                        
         | 
| 85 | 
            +
                        depth = depth.cpu().numpy().astype(np.uint8)
         | 
| 86 | 
            +
                        depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
         | 
| 87 | 
            +
                        
         | 
| 88 | 
            +
                        split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
         | 
| 89 | 
            +
                        combined_frame = cv2.hconcat([raw_frame, split_region, depth_color])
         | 
| 90 | 
            +
                        
         | 
| 91 | 
            +
                        # out.write(combined_frame)
         | 
| 92 | 
            +
                        # frame_path = os.path.join(temp_frame_dir, f"frame_{count:05d}.png")
         | 
| 93 | 
            +
                        # cv2.imwrite(frame_path, combined_frame)
         | 
| 94 | 
            +
                        out.write(combined_frame)
         | 
| 95 | 
            +
                        # count += 1
         | 
| 96 | 
            +
                    
         | 
| 97 | 
            +
                    raw_video.release()
         | 
| 98 | 
            +
                    out.release()
         | 
| 99 | 
            +
                    return output_path
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            css = """
         | 
| 102 | 
            +
            #img-display-container {
         | 
| 103 | 
            +
                max-height: 100vh;
         | 
| 104 | 
            +
                }
         | 
| 105 | 
            +
            #img-display-input {
         | 
| 106 | 
            +
                max-height: 80vh;
         | 
| 107 | 
            +
                }
         | 
| 108 | 
            +
            #img-display-output {
         | 
| 109 | 
            +
                max-height: 80vh;
         | 
| 110 | 
            +
                }
         | 
| 111 | 
            +
            """
         | 
| 112 | 
            +
            DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
         | 
| 113 | 
            +
            model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            title = "# Depth Anything Video Demo"
         | 
| 116 | 
            +
            description = """Depth Anything on full video files.
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            transform = Compose([
         | 
| 121 | 
            +
                    Resize(
         | 
| 122 | 
            +
                        width=518,
         | 
| 123 | 
            +
                        height=518,
         | 
| 124 | 
            +
                        resize_target=False,
         | 
| 125 | 
            +
                        keep_aspect_ratio=True,
         | 
| 126 | 
            +
                        ensure_multiple_of=14,
         | 
| 127 | 
            +
                        resize_method='lower_bound',
         | 
| 128 | 
            +
                        image_interpolation_method=cv2.INTER_CUBIC,
         | 
| 129 | 
            +
                    ),
         | 
| 130 | 
            +
                    NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         | 
| 131 | 
            +
                    PrepareForNet(),
         | 
| 132 | 
            +
            ])
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            @torch.no_grad()
         | 
| 135 | 
            +
            def predict_depth(model, image):
         | 
| 136 | 
            +
                return model(image)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
            with gr.Blocks(css=css) as demo:
         | 
| 139 | 
            +
                gr.Markdown(title)
         | 
| 140 | 
            +
                gr.Markdown(description)
         | 
| 141 | 
            +
                gr.Markdown("### Video Depth Prediction demo")
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                with gr.Row():
         | 
| 144 | 
            +
                    input_video = gr.Video(label="Input Video")
         | 
| 145 | 
            +
                submit = gr.Button("Submit")
         | 
| 146 | 
            +
                processed_video = gr.Video(label="Processed Video")
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def on_submit(uploaded_video):
         | 
| 149 | 
            +
                            
         | 
| 150 | 
            +
                    # Process the video and get the path of the output video
         | 
| 151 | 
            +
                    output_video_path = make_video(uploaded_video)
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                    return output_video_path
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                submit.click(on_submit, inputs=[input_video], outputs=processed_video)
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                example_files = os.listdir('assets/examples_video')
         | 
| 158 | 
            +
                example_files.sort()
         | 
| 159 | 
            +
                example_files = [os.path.join('assets/examples_video', filename) for filename in example_files]
         | 
| 160 | 
            +
                examples = gr.Examples(examples=example_files, inputs=[input_video], outputs=processed_video, fn=on_submit, cache_examples=False)
         | 
| 161 | 
            +
                
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            if __name__ == '__main__':
         | 
| 164 | 
            +
                demo.queue().launch()
         | 
    	
        assets/controlnet_demo1.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/controlnet_demo2.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo1.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo10.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo11.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo12.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo13.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo14.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo15.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo16.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo17.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo18.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo19.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo2.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo20.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo3.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo4.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo5.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo7.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo8.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples/demo9.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/examples_video/davis_dolphins.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:da2bdf883de86f3ad1f7ec58e34f50cd8dc1bbde8288e23a635a7396ba1af13d
         | 
| 3 | 
            +
            size 468524
         | 
    	
        assets/examples_video/davis_rollercoaster.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:291361c800b83ead49f50302ffc82f6ecd5205391934cc8354946b4b93e8cbb4
         | 
| 3 | 
            +
            size 596021
         | 
    	
        assets/examples_video/davis_seasnake.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:247f92487bc7a14bf2364847f83e23c7c99addf28abaa043bb353edb6531cead
         | 
| 3 | 
            +
            size 4010306
         | 
    	
        assets/paper.pdf
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3e554e412ffc6e9e6edddc963baa2211692860ab0efa92d74bf7d09b18d2c597
         | 
| 3 | 
            +
            size 4549655
         | 
    	
        assets/teaser.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        assets/video_edit/demo1_midas.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:11c668214e74ff067cadd9f5beaa8b103360398f5c689c4a4db6c74b451a6963
         | 
| 3 | 
            +
            size 187513
         | 
    	
        assets/video_edit/demo1_ours.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:57e88c7610076ee422f53c112907339d5a87338cc9186453eb5bfaf6ed3a9257
         | 
| 3 | 
            +
            size 431370
         | 
    	
        assets/video_edit/demo1_video.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:dfcb14cb7cec14c6b8198a6a455609dc5fac26c42628867e0d4412d53bfa0af7
         | 
| 3 | 
            +
            size 174791
         | 
    	
        assets/video_edit/demo2_midas.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:833fd0e7b41f712073ff48373139b1433a0e61b0221a44971a25d57b34a92078
         | 
| 3 | 
            +
            size 145623
         | 
    	
        assets/video_edit/demo2_ours.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:c49d36aab8eeac806613a80c870fa6d3b62694ad08da0550f2e7d6d1b29553fd
         | 
| 3 | 
            +
            size 222693
         | 
    	
        assets/video_edit/demo2_video.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:cb31e593a2a0973e37a5e004a25e7b5fde0e6e9234e283cc404ae7b9805b45ce
         | 
| 3 | 
            +
            size 112277
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            gradio_imageslider
         | 
| 2 | 
            +
            gradio==4.14.0
         | 
| 3 | 
            +
            torch
         | 
| 4 | 
            +
            torchvision
         | 
| 5 | 
            +
            opencv-python
         | 
| 6 | 
            +
            huggingface_hub
         | 
