xinlongwang commited on
Commit
ad02904
1 Parent(s): 81ab7b8

anything in a video

Browse files
.gitattributes CHANGED
@@ -36,3 +36,15 @@ rainbow.gif filter=lfs diff=lfs merge=lfs -text
36
  rainbow_.gif filter=lfs diff=lfs merge=lfs -text
37
  rainbow__.gif filter=lfs diff=lfs merge=lfs -text
38
  rainbow2.gif filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  rainbow_.gif filter=lfs diff=lfs merge=lfs -text
37
  rainbow__.gif filter=lfs diff=lfs merge=lfs -text
38
  rainbow2.gif filter=lfs diff=lfs merge=lfs -text
39
+ videos/jeep-moving.jpg filter=lfs diff=lfs merge=lfs -text
40
+ videos/a_car_is_moving_on_the_road_40.mp4 filter=lfs diff=lfs merge=lfs -text
41
+ videos/a_man_in_parkour_100.jpg filter=lfs diff=lfs merge=lfs -text
42
+ videos/a_man_in_parkour_100.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ videos/child-riding_lego.jpg filter=lfs diff=lfs merge=lfs -text
44
+ videos/child-riding_lego.mp4 filter=lfs diff=lfs merge=lfs -text
45
+ videos/jeep-moving.mp4 filter=lfs diff=lfs merge=lfs -text
46
+ videos/a_car_is_moving_on_the_road_40.jpg filter=lfs diff=lfs merge=lfs -text
47
+ videos/a_man_is_surfing_3_30.jpg filter=lfs diff=lfs merge=lfs -text
48
+ videos/a_man_is_surfing_3_30.mp4 filter=lfs diff=lfs merge=lfs -text
49
+ videos/horse-running.jpg filter=lfs diff=lfs merge=lfs -text
50
+ videos/horse-running.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -53,6 +53,32 @@ def inference_mask1(prompt,
53
  res.append(np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a[i]))))))
54
  return res
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def resizeImg(img):
57
  res, hres = 448, 448
58
  img = Image.fromarray(img).convert("RGB")
@@ -61,13 +87,13 @@ def resizeImg(img):
61
  img.save(temp, format="WEBP")
62
  return base64.b64encode(temp.getvalue()).decode('ascii')
63
 
64
- def inference_mask_cat(
65
- prompt,
66
- img,
67
- img_,
68
- ):
69
- output_list = [img, img_]
70
- return output_list
71
 
72
 
73
  # define app features and run
@@ -88,6 +114,15 @@ examples_sam = [
88
  ['./images/ydt_2.jpg', './images/ydt_1.jpg', './images/ydt_3.jpg'],
89
  ]
90
 
 
 
 
 
 
 
 
 
 
91
 
92
  demo_mask = gr.Interface(fn=inference_mask1,
93
  inputs=[gr.ImageMask(brush_radius=8, label="prompt (提示图)"), gr.Image(label="img1 (测试图1)"), gr.Image(label="img2 (测试图2)")],
@@ -134,6 +169,26 @@ demo_mask_sam = gr.Interface(fn=inference_mask1_sam,
134
  allow_flagging="never",
135
  )
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  title = "SegGPT: Segmenting Everything In Context<br> \
138
  <div align='center'> \
139
  <h2><a href='https://arxiv.org/abs/2304.03284' target='_blank' rel='noopener'>[paper]</a> \
@@ -144,7 +199,7 @@ title = "SegGPT: Segmenting Everything In Context<br> \
144
  </div> \
145
  "
146
 
147
- demo = gr.TabbedInterface([demo_mask_sam, demo_mask], ['SAM+SegGPT (一触百通)', 'General 1-shot'], title=title)
148
 
149
  #demo.launch(share=True, auth=("baai", "vision"))
150
  demo.launch(enable_queue=False)
 
53
  res.append(np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a[i]))))))
54
  return res
55
 
56
+
57
+
58
+ def inference_mask_video(
59
+ prompt,
60
+ vid,
61
+ request: gr.Request,
62
+ ):
63
+
64
+
65
+ files = {
66
+ "pimage" : resizeImgIo(prompt["image"]),
67
+ "pmask" : resizeImgIo(prompt["mask"]),
68
+ "video" : open(vid, 'rb'),
69
+ }
70
+ r = requests.post("http://120.92.79.209/painter/runVideo", files = files)
71
+ '''
72
+ path = str(uuid.uuid4()) + "." + str(time.time())
73
+ fName = 'out.mp4'
74
+ file_out = "video/" + path + "." + fName
75
+ with open(file_out,"wb") as f:
76
+ f.write(r.content)
77
+ '''
78
+ a = json.loads(r.text)
79
+ return [np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a["mask"]))))), a["url"]]
80
+
81
+
82
  def resizeImg(img):
83
  res, hres = 448, 448
84
  img = Image.fromarray(img).convert("RGB")
 
87
  img.save(temp, format="WEBP")
88
  return base64.b64encode(temp.getvalue()).decode('ascii')
89
 
90
+ def resizeImgIo(img):
91
+ res, hres = 448, 448
92
+ img = Image.fromarray(img).convert("RGB")
93
+ img = img.resize((res, hres))
94
+ temp = io.BytesIO()
95
+ img.save(temp, format="WEBP")
96
+ return io.BytesIO(temp.getvalue())
97
 
98
 
99
  # define app features and run
 
114
  ['./images/ydt_2.jpg', './images/ydt_1.jpg', './images/ydt_3.jpg'],
115
  ]
116
 
117
+ examples_video = [
118
+ ['./videos/horse-running.jpg', './videos/horse-running.mp4'],
119
+ ['./videos/a_man_is_surfing_3_30.jpg', './videos/a_man_is_surfing_3_30.mp4'],
120
+ ['./videos/a_car_is_moving_on_the_road_40.jpg', './videos/a_car_is_moving_on_the_road_40.mp4'],
121
+ ['./videos/jeep-moving.jpg', './videos/jeep-moving.mp4'],
122
+ ['./videos/child-riding_lego.jpg', './videos/child-riding_lego.mp4'],
123
+ ]
124
+
125
+
126
 
127
  demo_mask = gr.Interface(fn=inference_mask1,
128
  inputs=[gr.ImageMask(brush_radius=8, label="prompt (提示图)"), gr.Image(label="img1 (测试图1)"), gr.Image(label="img2 (测试图2)")],
 
169
  allow_flagging="never",
170
  )
171
 
172
+ demo_mask_video = gr.Interface(fn=inference_mask_video,
173
+ inputs=[gr.ImageMask(label="prompt (提示图)"), gr.Video(label="video (测试视频)").style(height=448, width=448)],
174
+ outputs=[gr.Image(label="SAM output (mask)").style(height=256, width=256), gr.Video().style(height=448, width=448)],
175
+ examples=examples_video,
176
+ description="<p> \
177
+ <strong>SegGPT+SAM: One touch for any segmentation in a video.</strong> <br>\
178
+ Choose an example below &#128293; &#128293; &#128293; <br>\
179
+ Or, upload by yourself: <br>\
180
+ 1. Upload a video to be tested to 'video'. If failed, please check the codec, we recommend h.264 by default. <br>2. Upload a prompt image to 'prompt' and draw <strong>a point or line on the target</strong>. <br>\
181
+ <br> \
182
+ 💎 SAM segments the target with any point or scribble, then SegGPT segments the whole video. <br>\
183
+ 💎 Examples below were never trained and are randomly selected for testing in the wild. <br>\
184
+ 💎 Current UI interface only unleashes a small part of the capabilities of SegGPT, i.e., 1-shot case. <br> \
185
+ Note: we only take the first 16 frames for the demo. \
186
+ </p>",
187
+ )
188
+
189
+
190
+
191
+
192
  title = "SegGPT: Segmenting Everything In Context<br> \
193
  <div align='center'> \
194
  <h2><a href='https://arxiv.org/abs/2304.03284' target='_blank' rel='noopener'>[paper]</a> \
 
199
  </div> \
200
  "
201
 
202
+ demo = gr.TabbedInterface([demo_mask_sam, demo_mask_video, demo_mask], ['SAM+SegGPT (一触百通)', '🎬Anything in a Video', 'General 1-shot'], title=title)
203
 
204
  #demo.launch(share=True, auth=("baai", "vision"))
205
  demo.launch(enable_queue=False)
videos/.DS_Store ADDED
Binary file (6.15 kB). View file
 
videos/a_car_is_moving_on_the_road_40.jpg ADDED

Git LFS Details

  • SHA256: 10daa18f2e97d4ee2318f1afc14af43e17d0a040414f52ad3d96b547a962070f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.54 MB
videos/a_car_is_moving_on_the_road_40.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e69783eed3294b0a76c147d46ce8705a46c21ae4122edc0fd9a2b57ee453954
3
+ size 248227
videos/a_man_in_parkour_100.jpg ADDED

Git LFS Details

  • SHA256: 63bcaad8d862552449118379adc8c277a5c61d35d5d0c4eb771016a04fdfccf2
  • Pointer size: 132 Bytes
  • Size of remote file: 2.18 MB
videos/a_man_in_parkour_100.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda65f31336de9f9ed607fe60443164a8529b98d949b7cb8e068a2245352e2e3
3
+ size 1020054
videos/a_man_is_surfing_3_30.jpg ADDED

Git LFS Details

  • SHA256: 14da330e073633607b72501b2a9c4a5be0c49f61a3de799366df8ee0dded1afd
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
videos/a_man_is_surfing_3_30.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73633b80752cefe0e68fb3bf6d1117d5bc1c094c4198c871beb7b59c6856f2f7
3
+ size 301229
videos/child-riding_lego.jpg ADDED

Git LFS Details

  • SHA256: 738a855aff8883a4d3a15b619f5e00fee99d4004e409832801b8fead5c362e47
  • Pointer size: 130 Bytes
  • Size of remote file: 21.5 kB
videos/child-riding_lego.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24a9ffd1a3a430851bb864f3fe1da4e84aed8d9694aea42d1f1578e7ef4818b4
3
+ size 117189
videos/horse-running.jpg ADDED

Git LFS Details

  • SHA256: 06009d69b9da293a5f34c1dad221efd4022568a1232edb5e131228c539f8a1e9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
videos/horse-running.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd8b6c99776f291bf4c8787721387d8764c85b787741c665dee49dfb6442630
3
+ size 383635
videos/jeep-moving.jpg ADDED

Git LFS Details

  • SHA256: e61812ffd152c44f7b67e4a0d33f2d79c9d074fa431f6ea83a316a05f6f25a88
  • Pointer size: 130 Bytes
  • Size of remote file: 36.9 kB
videos/jeep-moving.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c2b38d7c52d8a19be7aa7f568d1d07b5fc433cbd369f45e028325230ad76ba
3
+ size 150698