Safetensors
English
qwen2
BrianatCambridge commited on
Commit
e157e18
·
verified ·
1 Parent(s): 0361439

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
config.json ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/bn/tiktok-mm-4/aiic/public/model/OV-Qwen2-7B-AM9",
3
+ "add_time_token": true,
4
+ "architectures": [
5
+ "LlavaAVQwenForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 151643,
9
+ "eos_token_id": 151645,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3584,
12
+ "image_aspect_ratio": "anyres_max_9",
13
+ "image_crop_resolution": null,
14
+ "image_grid_pinpoints": [
15
+ [
16
+ 384,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 768
22
+ ],
23
+ [
24
+ 384,
25
+ 1152
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 768,
33
+ 768
34
+ ],
35
+ [
36
+ 384,
37
+ 1920
38
+ ],
39
+ [
40
+ 768,
41
+ 768
42
+ ],
43
+ [
44
+ 384,
45
+ 2304
46
+ ],
47
+ [
48
+ 768,
49
+ 1152
50
+ ],
51
+ [
52
+ 384,
53
+ 2688
54
+ ],
55
+ [
56
+ 768,
57
+ 1152
58
+ ],
59
+ [
60
+ 384,
61
+ 3072
62
+ ],
63
+ [
64
+ 768,
65
+ 1536
66
+ ],
67
+ [
68
+ 384,
69
+ 3456
70
+ ],
71
+ [
72
+ 768,
73
+ 1536
74
+ ],
75
+ [
76
+ 1152,
77
+ 1152
78
+ ],
79
+ [
80
+ 768,
81
+ 1920
82
+ ],
83
+ [
84
+ 1152,
85
+ 1152
86
+ ],
87
+ [
88
+ 768,
89
+ 1920
90
+ ],
91
+ [
92
+ 1152,
93
+ 1152
94
+ ],
95
+ [
96
+ 768,
97
+ 2304
98
+ ],
99
+ [
100
+ 1152,
101
+ 1536
102
+ ],
103
+ [
104
+ 768,
105
+ 2304
106
+ ],
107
+ [
108
+ 1152,
109
+ 1536
110
+ ],
111
+ [
112
+ 768,
113
+ 2688
114
+ ],
115
+ [
116
+ 1152,
117
+ 1536
118
+ ],
119
+ [
120
+ 768,
121
+ 2688
122
+ ],
123
+ [
124
+ 1152,
125
+ 1920
126
+ ],
127
+ [
128
+ 768,
129
+ 3072
130
+ ],
131
+ [
132
+ 1152,
133
+ 1920
134
+ ],
135
+ [
136
+ 1536,
137
+ 1536
138
+ ],
139
+ [
140
+ 768,
141
+ 3072
142
+ ],
143
+ [
144
+ 1152,
145
+ 1920
146
+ ],
147
+ [
148
+ 1536,
149
+ 1536
150
+ ],
151
+ [
152
+ 768,
153
+ 3456
154
+ ],
155
+ [
156
+ 1152,
157
+ 2304
158
+ ],
159
+ [
160
+ 1536,
161
+ 1536
162
+ ],
163
+ [
164
+ 768,
165
+ 3456
166
+ ],
167
+ [
168
+ 1152,
169
+ 2304
170
+ ],
171
+ [
172
+ 1536,
173
+ 1536
174
+ ],
175
+ [
176
+ 768,
177
+ 3840
178
+ ],
179
+ [
180
+ 1152,
181
+ 2304
182
+ ],
183
+ [
184
+ 1536,
185
+ 1920
186
+ ],
187
+ [
188
+ 768,
189
+ 3840
190
+ ],
191
+ [
192
+ 1152,
193
+ 2688
194
+ ],
195
+ [
196
+ 1536,
197
+ 1920
198
+ ],
199
+ [
200
+ 768,
201
+ 4224
202
+ ],
203
+ [
204
+ 1152,
205
+ 2688
206
+ ],
207
+ [
208
+ 1536,
209
+ 1920
210
+ ],
211
+ [
212
+ 768,
213
+ 4224
214
+ ],
215
+ [
216
+ 1152,
217
+ 2688
218
+ ],
219
+ [
220
+ 1536,
221
+ 1920
222
+ ],
223
+ [
224
+ 768,
225
+ 4608
226
+ ],
227
+ [
228
+ 1152,
229
+ 3072
230
+ ],
231
+ [
232
+ 1536,
233
+ 2304
234
+ ],
235
+ [
236
+ 768,
237
+ 4608
238
+ ],
239
+ [
240
+ 1152,
241
+ 3072
242
+ ],
243
+ [
244
+ 1536,
245
+ 2304
246
+ ],
247
+ [
248
+ 1920,
249
+ 1920
250
+ ],
251
+ [
252
+ 768,
253
+ 4992
254
+ ],
255
+ [
256
+ 1152,
257
+ 3072
258
+ ],
259
+ [
260
+ 1536,
261
+ 2304
262
+ ],
263
+ [
264
+ 1920,
265
+ 1920
266
+ ],
267
+ [
268
+ 768,
269
+ 4992
270
+ ],
271
+ [
272
+ 1152,
273
+ 3456
274
+ ],
275
+ [
276
+ 1536,
277
+ 2304
278
+ ],
279
+ [
280
+ 1920,
281
+ 1920
282
+ ],
283
+ [
284
+ 768,
285
+ 5376
286
+ ],
287
+ [
288
+ 1152,
289
+ 3456
290
+ ],
291
+ [
292
+ 1536,
293
+ 2688
294
+ ],
295
+ [
296
+ 1920,
297
+ 1920
298
+ ],
299
+ [
300
+ 768,
301
+ 5376
302
+ ],
303
+ [
304
+ 1152,
305
+ 3456
306
+ ],
307
+ [
308
+ 1536,
309
+ 2688
310
+ ],
311
+ [
312
+ 1920,
313
+ 1920
314
+ ],
315
+ [
316
+ 768,
317
+ 5760
318
+ ],
319
+ [
320
+ 1152,
321
+ 3840
322
+ ],
323
+ [
324
+ 1536,
325
+ 2688
326
+ ],
327
+ [
328
+ 1920,
329
+ 2304
330
+ ],
331
+ [
332
+ 768,
333
+ 5760
334
+ ],
335
+ [
336
+ 1152,
337
+ 3840
338
+ ],
339
+ [
340
+ 1536,
341
+ 2688
342
+ ],
343
+ [
344
+ 1920,
345
+ 2304
346
+ ],
347
+ [
348
+ 768,
349
+ 6144
350
+ ],
351
+ [
352
+ 1152,
353
+ 3840
354
+ ],
355
+ [
356
+ 1536,
357
+ 3072
358
+ ],
359
+ [
360
+ 1920,
361
+ 2304
362
+ ],
363
+ [
364
+ 768,
365
+ 6144
366
+ ],
367
+ [
368
+ 1152,
369
+ 4224
370
+ ],
371
+ [
372
+ 1536,
373
+ 3072
374
+ ],
375
+ [
376
+ 1920,
377
+ 2304
378
+ ],
379
+ [
380
+ 768,
381
+ 6528
382
+ ],
383
+ [
384
+ 1152,
385
+ 4224
386
+ ],
387
+ [
388
+ 1536,
389
+ 3072
390
+ ],
391
+ [
392
+ 1920,
393
+ 2304
394
+ ],
395
+ [
396
+ 768,
397
+ 6528
398
+ ],
399
+ [
400
+ 1152,
401
+ 4224
402
+ ],
403
+ [
404
+ 1536,
405
+ 3072
406
+ ],
407
+ [
408
+ 1920,
409
+ 2688
410
+ ],
411
+ [
412
+ 768,
413
+ 6912
414
+ ],
415
+ [
416
+ 1152,
417
+ 4608
418
+ ],
419
+ [
420
+ 1536,
421
+ 3456
422
+ ],
423
+ [
424
+ 1920,
425
+ 2688
426
+ ],
427
+ [
428
+ 2304,
429
+ 2304
430
+ ]
431
+ ],
432
+ "image_split_resolution": null,
433
+ "initializer_range": 0.02,
434
+ "intermediate_size": 18944,
435
+ "max_position_embeddings": 32768,
436
+ "max_window_layers": 28,
437
+ "mf_split_init": false,
438
+ "mm_hidden_size": 1152,
439
+ "mm_patch_merge_type": "spatial_unpad",
440
+ "mm_projector_lr": null,
441
+ "mm_projector_type": "mlp2x_gelu",
442
+ "mm_resampler_type": null,
443
+ "mm_tunable_parts": "mm_vision_tower,mm_mlp_adapter,mm_language_model",
444
+ "mm_use_im_patch_token": false,
445
+ "mm_use_im_start_end": false,
446
+ "mm_vision_select_feature": "patch",
447
+ "mm_vision_select_layer": -2,
448
+ "mm_vision_tower": "google/siglip-so400m-patch14-384",
449
+ "mm_vision_tower_lr": 2e-06,
450
+ "model_args": {
451
+ "add_time_token": true,
452
+ "audio_visual": true,
453
+ "beats_path": null,
454
+ "do_rag": false,
455
+ "flash_attn": false,
456
+ "fps": 2,
457
+ "freeze_backbone": true,
458
+ "freeze_beats": true,
459
+ "freeze_final_linear": true,
460
+ "freeze_speech_QFormer": true,
461
+ "freeze_whisper": true,
462
+ "from_stage_1_5": false,
463
+ "image_processor": "google/siglip-so400m-patch14-384",
464
+ "lora_alpha": 256,
465
+ "lora_dropout": 0.05,
466
+ "lora_enable": true,
467
+ "lora_r": 64,
468
+ "mf_split_init": false,
469
+ "mm_mask_drop_mode": "fixed",
470
+ "mm_mask_drop_ratio": 0.25,
471
+ "mm_mask_drop_ratio_lower": null,
472
+ "mm_mask_drop_ratio_upper": null,
473
+ "mm_mask_drop_skip_percentage": 0.0,
474
+ "mm_newline_position": "grid",
475
+ "mm_patch_merge_type": "spatial_unpad",
476
+ "mm_perceiver_depth": 3,
477
+ "mm_perceiver_ff_mult": 4,
478
+ "mm_perceiver_latents": 32,
479
+ "mm_perceiver_pretrained": null,
480
+ "mm_pooling_position": "before",
481
+ "mm_projector_type": "mlp2x_gelu",
482
+ "mm_qformer_depth": 3,
483
+ "mm_qformer_latents": 32,
484
+ "mm_qformer_pretrained": null,
485
+ "mm_resampler_type": "spatial_pool",
486
+ "mm_spatial_pool_mode": "max",
487
+ "mm_spatial_pool_out_channels": 1152,
488
+ "mm_spatial_pool_stride": 2,
489
+ "mm_use_im_patch_token": false,
490
+ "mm_use_im_start_end": false,
491
+ "mm_vision_select_feature": "patch",
492
+ "mm_vision_select_layer": -2,
493
+ "mm_vlmattention_bert_type": "qformer_pretrain",
494
+ "mm_vlmattention_compress_type": null,
495
+ "mm_vlmattention_num_query": 32,
496
+ "mm_vlmattention_pretrained": null,
497
+ "modality_max_length": "None",
498
+ "model_name_or_path": "/mnt/bn/tiktok-mm-4/aiic/users/guangzhisun/llava-video/output/models/sft_finevideo_with_caption_qa_reasoning/checkpoint_93000_qwen",
499
+ "multi_frame_num": 30,
500
+ "multi_frame_projector": false,
501
+ "niv_cnn_params": "[(10, 5), (3, 2), (3, 2), (3, 2), (3, 2), (2, 2), (2, 2)]",
502
+ "niv_in_channels": 4,
503
+ "niv_out_channels": 401,
504
+ "num_speech_query_token": 1,
505
+ "patchify_video_feature": false,
506
+ "pretrain_mm_mlp_adapter": null,
507
+ "rag_input_frames": 1,
508
+ "rag_topk": 5,
509
+ "rag_type": "direct",
510
+ "salmonn_path": null,
511
+ "second_per_window": 0.2,
512
+ "second_stride": 0.2,
513
+ "segmentation": -1,
514
+ "spt_projector": false,
515
+ "tune_mm_mlp_adapter": false,
516
+ "tune_mm_vision_resampler": false,
517
+ "unfreeze_mm_vision_tower": false,
518
+ "use_final_linear": true,
519
+ "use_flash_tower": true,
520
+ "use_mfcnn": false,
521
+ "use_mftrans": false,
522
+ "use_niv": false,
523
+ "use_speech_Qformer": true,
524
+ "version": "qwen_1_5",
525
+ "vision_tower": "google/siglip-so400m-patch14-384",
526
+ "whisper_lora": false,
527
+ "whisper_path": "openai/whisper-large-v3",
528
+ "window_level_Qformer": true
529
+ },
530
+ "model_type": "qwen2",
531
+ "multi_frame_num": 30,
532
+ "multi_frame_projector": false,
533
+ "num_attention_heads": 28,
534
+ "num_hidden_layers": 28,
535
+ "num_key_value_heads": 4,
536
+ "pos_skipping_range": 4096,
537
+ "rms_norm_eps": 1e-06,
538
+ "rope_scaling": null,
539
+ "rope_theta": 1000000.0,
540
+ "sliding_window": 131072,
541
+ "tie_word_embeddings": false,
542
+ "tokenizer_model_max_length": 32768,
543
+ "tokenizer_padding_side": "right",
544
+ "torch_dtype": "bfloat16",
545
+ "transformers_version": "4.39.2",
546
+ "use_cache": true,
547
+ "use_flash_tower": true,
548
+ "use_mfcnn": false,
549
+ "use_mftrans": false,
550
+ "use_mm_proj": true,
551
+ "use_pos_skipping": false,
552
+ "use_sliding_window": false,
553
+ "vision_tower_pretrained": null,
554
+ "vocab_size": 152064
555
+ }
generation_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "beats_path": null,
3
+ "bos_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "freeze_beats": true,
10
+ "freeze_speech_QFormer": true,
11
+ "freeze_whisper": true,
12
+ "niv_cnn_params": "[(10, 5), (3, 2), (3, 2), (3, 2), (3, 2), (2, 2), (2, 2)]",
13
+ "niv_in_channels": 4,
14
+ "niv_out_channels": 401,
15
+ "num_speech_query_token": 1,
16
+ "pad_token_id": 151643,
17
+ "repetition_penalty": 1.05,
18
+ "salmonn_path": null,
19
+ "second_per_window": 0.2,
20
+ "second_stride": 0.2,
21
+ "temperature": 0.7,
22
+ "top_k": 20,
23
+ "top_p": 0.8,
24
+ "train_orm": "onlineormtraingrpoprime",
25
+ "transformers_version": "4.39.2",
26
+ "use_final_linear": true,
27
+ "use_niv": false,
28
+ "use_speech_Qformer": true,
29
+ "video_fps": 2,
30
+ "whisper_path": "openai/whisper-large-v3",
31
+ "window_level_Qformer": true
32
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b48adee0f23054120634b7146af6240ef26ffcd1b32b6100cd3f872ec36291
3
+ size 4877669664
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4881e407d5cc3395d5f2150ef3d761a76564ffc52beb45e930e93502ca8dd649
3
+ size 4932751008
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9f33fd16b0f0561bf7f7d39d916a6d5f9887f700e04f825d13d407911a0c2e
3
+ size 4998052768
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df807325f2a614b1e970223a19f233b50debcdda8c17729b02224dfe67963049
3
+ size 2624207888
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "padding_side": "right",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff