Kwai-Keye commited on
Commit
9e03dd0
·
verified ·
1 Parent(s): aea2b9f

Update model files

Browse files
Files changed (3) hide show
  1. .vscode/settings.json +3 -0
  2. README.md +1 -8
  3. processing_keye_vl_1_5.py +10 -2
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "kwaipilot.settings.proxy": "https://kinsight.corp.kuaishou.com"
3
+ }
README.md CHANGED
@@ -1,10 +1,3 @@
1
- ---
2
- license: apache-2.0
3
- language:
4
- - zh
5
- - en
6
- pipeline_tag: image-text-to-text
7
- ---
8
  # Kwai Keye-VL
9
 
10
 
@@ -12,7 +5,7 @@ pipeline_tag: image-text-to-text
12
  <img src="asset/keye_logo_2.png" width="100%" alt="Kwai Keye-VL Logo">
13
  </div>
14
 
15
- <font size=3><div align='center' >
16
  [[🍎 Home Page](https://kwai-keye.github.io/)]
17
  [[📖 Technique Report](https://arxiv.org/abs/2507.01949)]
18
  [[📊 Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
 
 
 
 
 
 
 
 
1
  # Kwai Keye-VL
2
 
3
 
 
5
  <img src="asset/keye_logo_2.png" width="100%" alt="Kwai Keye-VL Logo">
6
  </div>
7
 
8
+ <font size=7><div align='center' >
9
  [[🍎 Home Page](https://kwai-keye.github.io/)]
10
  [[📖 Technique Report](https://arxiv.org/abs/2507.01949)]
11
  [[📊 Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
processing_keye_vl_1_5.py CHANGED
@@ -234,10 +234,14 @@ class KeyeVL1_5Processor(ProcessorMixin):
234
  mode="bilinear",
235
  antialias=True,
236
  ).float()
 
 
 
 
237
  # Tensor(N, C, H, W) -> Tuple[Tensor(1, C, H, W)]
238
  # slow_frames = list(slow_frames.split(1, dim=0)),不split,在模型里面做
239
  slow_video_inputs = self.image_processor(
240
- images=None, videos=[slow_frames], **output_kwargs["images_kwargs"], do_resize=False)
241
  slow_video_grid_thw = slow_video_inputs["video_grid_thw"]
242
  batch_slow_frames.append(slow_video_inputs)
243
  # # 当前这个视频每一帧的token数
@@ -255,10 +259,14 @@ class KeyeVL1_5Processor(ProcessorMixin):
255
  mode="bilinear",
256
  antialias=True,
257
  ).float()
 
 
 
 
258
  # Tensor(N, C, H, W) -> Tuple[Tensor(1, C, H, W)]
259
  # fast_frames = list(fast_frames.split(1, dim=0))
260
  fast_video_inputs = self.image_processor(
261
- images=None, videos=[fast_frames], **output_kwargs["images_kwargs"], do_resize=False)
262
  fast_video_grid_thw = fast_video_inputs["video_grid_thw"]
263
  batch_fast_frames.append(fast_video_inputs)
264
  # # 当前这个视频的所有token数
 
234
  mode="bilinear",
235
  antialias=True,
236
  ).float()
237
+ do_resize = False
238
+ else:
239
+ slow_frames = slow_frames.float()
240
+ do_resize = True
241
  # Tensor(N, C, H, W) -> Tuple[Tensor(1, C, H, W)]
242
  # slow_frames = list(slow_frames.split(1, dim=0)),不split,在模型里面做
243
  slow_video_inputs = self.image_processor(
244
+ images=None, videos=[slow_frames], **output_kwargs["images_kwargs"], do_resize=do_resize)
245
  slow_video_grid_thw = slow_video_inputs["video_grid_thw"]
246
  batch_slow_frames.append(slow_video_inputs)
247
  # # 当前这个视频每一帧的token数
 
259
  mode="bilinear",
260
  antialias=True,
261
  ).float()
262
+ do_fast_resize = False
263
+ else:
264
+ fast_frames = fast_frames.float()
265
+ do_fast_resize = True
266
  # Tensor(N, C, H, W) -> Tuple[Tensor(1, C, H, W)]
267
  # fast_frames = list(fast_frames.split(1, dim=0))
268
  fast_video_inputs = self.image_processor(
269
+ images=None, videos=[fast_frames], **output_kwargs["images_kwargs"], do_resize=do_fast_resize)
270
  fast_video_grid_thw = fast_video_inputs["video_grid_thw"]
271
  batch_fast_frames.append(fast_video_inputs)
272
  # # 当前这个视频的所有token数