LaViLa (EK-100_MIR), Llava-v1.5-7B-GGUF, MobileVLM, Qwen2-VL-2B-Instruct, YOLO v8n, YOLO World
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +19 -0
- LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf +3 -0
- LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf +3 -0
- LaViLa/EK-100_MIR/TSF-B/clip_openai_timesformer_base.ft_ek100_mir.ep_0085.md5sum_c67d95.pth +3 -0
- MobileVLM/MobileVLM-1.7B-GGUF/.gitattributes +39 -0
- MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf +3 -0
- MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf +3 -0
- MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf +3 -0
- MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf +3 -0
- MobileVLM/MobileVLM_V2-1.7B-GGUF/.gitattributes +37 -0
- MobileVLM/MobileVLM_V2-1.7B-GGUF/README.md +16 -0
- MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf +3 -0
- MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf +3 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/.gitattributes +41 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf +3 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf +3 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf +3 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf +3 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/README.md +29 -0
- Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf +3 -0
- YOLO/yolo-world/yolo-world-s.pt +3 -0
- YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/labels_yolov8n_silu_coco.json +82 -0
- YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.json +67 -0
- YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.tflite +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/.gitattributes +41 -0
- onnx-community/Qwen2-VL-2B-Instruct/README.md +303 -0
- onnx-community/Qwen2-VL-2B-Instruct/added_tokens.json +16 -0
- onnx-community/Qwen2-VL-2B-Instruct/chat_template.json +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/config.json +56 -0
- onnx-community/Qwen2-VL-2B-Instruct/generation_config.json +13 -0
- onnx-community/Qwen2-VL-2B-Instruct/merges.txt +0 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_bnb4.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_int8.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4f16.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_quantized.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_uint8.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_bnb4.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_fp16.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_int8.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4f16.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_quantized.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_uint8.onnx +3 -0
- onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,22 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
45 |
+
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
46 |
+
onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
47 |
+
onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
48 |
+
onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
49 |
+
onnx-community/Qwen2-VL-2B-Instruct/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
50 |
+
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
52 |
+
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
53 |
+
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
54 |
+
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dde114456d7b4ea5cc07c75e363f9807db1dcf82f61168cc91969316601ca467
|
3 |
+
size 2532863776
|
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50da4e5b0a011615f77686f9b02613571e65d23083c225e107c08c3b1775d9b1
|
3 |
+
size 624434368
|
LaViLa/EK-100_MIR/TSF-B/clip_openai_timesformer_base.ft_ek100_mir.ep_0085.md5sum_c67d95.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17e8302ae99b607459b7c01480bf85a7ba664b8afc91b0bbf6a9be662887c91d
|
3 |
+
size 710829395
|
MobileVLM/MobileVLM-1.7B-GGUF/.gitattributes
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
MobileVLM-1.7B-mmproj-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
MobileVLM-1.7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
MobileVLM-1.7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
MobileVLM-1.7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
|
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41f0487ea4d3a4f58c467d779bfe66c02d9b33fa2d33b85cf456df89eb35dab9
|
3 |
+
size 834055776
|
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ca45d185225a61c0b003b7a4fe072ddae9ea57bbda7e7025e7dec50421192b8
|
3 |
+
size 972795488
|
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73ee20a00293a6419e311c349f7dc257ce0a15e9e59962933890204a0fcc0881
|
3 |
+
size 1120206432
|
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d9855d323cee2a1797a88f9d7057ce26b21dcd62a50b382c4ff44ea60c77e39
|
3 |
+
size 620384896
|
MobileVLM/MobileVLM_V2-1.7B-GGUF/.gitattributes
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
ggml-model-q4_k.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
MobileVLM/MobileVLM_V2-1.7B-GGUF/README.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
tags:
|
4 |
+
- MobileVLM V2
|
5 |
+
---
|
6 |
+
## Model Summery
|
7 |
+
MobileVLM V2 is a family of significantly improved vision language models upon MobileVLM, which proves that a delicate orchestration of novel architectural design, an improved training scheme tailored for mobile VLMs, and rich high-quality dataset curation can substantially benefit VLMs’ performance. Specifically, MobileVLM V2 1.7B achieves better or on-par performance on standard VLM benchmarks compared with much larger VLMs at the 3B scale. Notably, MobileVLM_V2-3B model outperforms a large variety of VLMs at the 7B+ scale.
|
8 |
+
|
9 |
+
The MobileVLM_V2-1.7B was built on our [MobileLLaMA-1.4B-Chat](](https://huggingface.co/mtgv/MobileLLaMA-1.4B-Chat)) to facilitate the off-the-shelf deployment.
|
10 |
+
|
11 |
+
## Model Sources
|
12 |
+
- Repository: https://github.com/Meituan-AutoML/MobileVLM
|
13 |
+
- Paper: [MobileVLM V2: Faster and Stronger Baseline for Vision Language Model](https://arxiv.org/abs/2402.03766)
|
14 |
+
|
15 |
+
## How to Get Started with the Model
|
16 |
+
Inference examples can be found at [Github](https://github.com/Meituan-AutoML/MobileVLM).
|
MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15d4bd09293404831902c23dd898aa2cc7b4b223b6c39a64e330601ef72d99db
|
3 |
+
size 791817856
|
MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57966afa654e9d46a11b2a4b17989c2d487cd961f702c4fe310f86db5e30aab4
|
3 |
+
size 595103072
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/.gitattributes
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Qwen2-VL-2B-Instruct-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Qwen2-VL-2B-Instruct-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Qwen2-VL-2B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Qwen2-VL-2B-Instruct-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
mmproj-Qwen2-VL-2B-Instruct-f32.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
mmproj-model-f32.gguf filter=lfs diff=lfs merge=lfs -text
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feb289177bcb04bec880720eb7a10890148f8a6586d911b921a301f55d632807
|
3 |
+
size 880161248
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20868587821c6b9f82089daac35918f8150c3568e3c4a2cdd51a24b6dd75ab79
|
3 |
+
size 986046944
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7505e951bbeda59f36e0ff413f5a76fec9281f640613e549e0f4398329cf631
|
3 |
+
size 1272738272
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c11dff3f74b326668600607a04b412c320d9dfe917a77f48ed6abbd962dd4a44
|
3 |
+
size 1646571488
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
quantized_by: bartowski
|
3 |
+
pipeline_tag: text-generation
|
4 |
+
---
|
5 |
+
## 💫 Community Model> Qwen2 VL 2B Instruct by Qwen
|
6 |
+
|
7 |
+
*👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)*.
|
8 |
+
|
9 |
+
**Model creator:** [Qwen](https://huggingface.co/Qwen)<br>
|
10 |
+
**Original model**: [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)<br>
|
11 |
+
**GGUF quantization:** provided by [bartowski](https://huggingface.co/bartowski) based on `llama.cpp` release [b4327](https://github.com/ggerganov/llama.cpp/releases/tag/b4327)<br>
|
12 |
+
|
13 |
+
## Technical Details
|
14 |
+
|
15 |
+
Supports context length of 32k tokens.
|
16 |
+
|
17 |
+
Vision model capable of understanding images of various resolutions and ratios.
|
18 |
+
|
19 |
+
Complex reasoning for agentic automation with vision.
|
20 |
+
|
21 |
+
Multilingual support.
|
22 |
+
|
23 |
+
## Special thanks
|
24 |
+
|
25 |
+
🙏 Special thanks to [Georgi Gerganov](https://github.com/ggerganov) and the whole team working on [llama.cpp](https://github.com/ggerganov/llama.cpp/) for making all of this possible.
|
26 |
+
|
27 |
+
## Disclaimers
|
28 |
+
|
29 |
+
LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
|
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b5a4e3b50652c60f7a027fb113d7e9d8f9411b0702f4de4e0743f71a3355530
|
3 |
+
size 2661115392
|
YOLO/yolo-world/yolo-world-s.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b36b186fa4279efabade126b2ccce57008f36523e41bf344a6fd155e49a0368
|
3 |
+
size 27166882
|
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/labels_yolov8n_silu_coco.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": "person",
|
3 |
+
"1": "bicycle",
|
4 |
+
"2": "car",
|
5 |
+
"3": "motorcycle",
|
6 |
+
"4": "airplane",
|
7 |
+
"5": "bus",
|
8 |
+
"6": "train",
|
9 |
+
"7": "truck",
|
10 |
+
"8": "boat",
|
11 |
+
"9": "traffic light",
|
12 |
+
"10": "fire hydrant",
|
13 |
+
"11": "stop sign",
|
14 |
+
"12": "parking meter",
|
15 |
+
"13": "bench",
|
16 |
+
"14": "bird",
|
17 |
+
"15": "cat",
|
18 |
+
"16": "dog",
|
19 |
+
"17": "horse",
|
20 |
+
"18": "sheep",
|
21 |
+
"19": "cow",
|
22 |
+
"20": "elephant",
|
23 |
+
"21": "bear",
|
24 |
+
"22": "zebra",
|
25 |
+
"23": "giraffe",
|
26 |
+
"24": "backpack",
|
27 |
+
"25": "umbrella",
|
28 |
+
"26": "handbag",
|
29 |
+
"27": "tie",
|
30 |
+
"28": "suitcase",
|
31 |
+
"29": "frisbee",
|
32 |
+
"30": "skis",
|
33 |
+
"31": "snowboard",
|
34 |
+
"32": "sports ball",
|
35 |
+
"33": "kite",
|
36 |
+
"34": "baseball bat",
|
37 |
+
"35": "baseball glove",
|
38 |
+
"36": "skateboard",
|
39 |
+
"37": "surfboard",
|
40 |
+
"38": "tennis racket",
|
41 |
+
"39": "bottle",
|
42 |
+
"40": "wine glass",
|
43 |
+
"41": "cup",
|
44 |
+
"42": "fork",
|
45 |
+
"43": "knife",
|
46 |
+
"44": "spoon",
|
47 |
+
"45": "bowl",
|
48 |
+
"46": "banana",
|
49 |
+
"47": "apple",
|
50 |
+
"48": "sandwich",
|
51 |
+
"49": "orange",
|
52 |
+
"50": "broccoli",
|
53 |
+
"51": "carrot",
|
54 |
+
"52": "hot dog",
|
55 |
+
"53": "pizza",
|
56 |
+
"54": "donut",
|
57 |
+
"55": "cake",
|
58 |
+
"56": "chair",
|
59 |
+
"57": "couch",
|
60 |
+
"58": "potted plant",
|
61 |
+
"59": "bed",
|
62 |
+
"60": "dining table",
|
63 |
+
"61": "toilet",
|
64 |
+
"62": "tv",
|
65 |
+
"63": "laptop",
|
66 |
+
"64": "mouse",
|
67 |
+
"65": "remote",
|
68 |
+
"66": "keyboard",
|
69 |
+
"67": "cell phone",
|
70 |
+
"68": "microwave",
|
71 |
+
"69": "oven",
|
72 |
+
"70": "toaster",
|
73 |
+
"71": "sink",
|
74 |
+
"72": "refrigerator",
|
75 |
+
"73": "book",
|
76 |
+
"74": "clock",
|
77 |
+
"75": "vase",
|
78 |
+
"76": "scissors",
|
79 |
+
"77": "teddy bear",
|
80 |
+
"78": "hair drier",
|
81 |
+
"79": "toothbrush"
|
82 |
+
}
|
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ConfigVersion": 6,
|
3 |
+
"Checksum": "1b27ef9c2ceea55429dd1c98a13510e8597495a62bf5d1bf74e72b9bad883502",
|
4 |
+
"DEVICE": [
|
5 |
+
{
|
6 |
+
"DeviceType": "EDGETPU",
|
7 |
+
"RuntimeAgent": "TFLITE",
|
8 |
+
"SupportedDeviceTypes": "TFLITE/EDGETPU"
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"PRE_PROCESS": [
|
12 |
+
{
|
13 |
+
"InputN": 1,
|
14 |
+
"InputType": "Image",
|
15 |
+
"InputResizeMethod": "bilinear",
|
16 |
+
"InputPadMethod": "letterbox",
|
17 |
+
"ImageBackend": "auto",
|
18 |
+
"InputH": 640,
|
19 |
+
"InputW": 640,
|
20 |
+
"InputC": 3,
|
21 |
+
"InputQuantEn": true,
|
22 |
+
"InputQuantOffset": 0,
|
23 |
+
"InputQuantScale": 0.00392156862745098,
|
24 |
+
"InputImgNormEn": true,
|
25 |
+
"InputImgNormCoeff": 0.00392156862745098,
|
26 |
+
"InputNormMean": [
|
27 |
+
0,
|
28 |
+
0,
|
29 |
+
0
|
30 |
+
],
|
31 |
+
"InputNormStd": [
|
32 |
+
1,
|
33 |
+
1,
|
34 |
+
1
|
35 |
+
],
|
36 |
+
"InputTensorLayout": "NHWC",
|
37 |
+
"InputImgSliceType": "SLICE2"
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"MODEL_PARAMETERS": [
|
41 |
+
{
|
42 |
+
"ModelPath": "yolov8n_silu_coco--640x640_quant_tflite_edgetpu_1.tflite"
|
43 |
+
}
|
44 |
+
],
|
45 |
+
"POST_PROCESS": [
|
46 |
+
{
|
47 |
+
"OutputPostprocessType": "DetectionYoloV8",
|
48 |
+
"PostProcessorInputs": [
|
49 |
+
220,
|
50 |
+
221,
|
51 |
+
225,
|
52 |
+
222,
|
53 |
+
224,
|
54 |
+
223
|
55 |
+
],
|
56 |
+
"OutputNumClasses": 80,
|
57 |
+
"LabelsPath": "labels_yolov8n_silu_coco.json",
|
58 |
+
"OutputClassIDAdjustment": 0,
|
59 |
+
"OutputNMSThreshold": 0.6,
|
60 |
+
"MaxDetectionsPerClass": 100,
|
61 |
+
"MaxClassesPerDetection": 1,
|
62 |
+
"UseRegularNMS": true,
|
63 |
+
"OutputConfThreshold": 0.3,
|
64 |
+
"MaxDetections": 100
|
65 |
+
}
|
66 |
+
]
|
67 |
+
}
|
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.tflite
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8023850f1b629752acd65c34cc6f3bdfe1e8d7985327e4ae6081a3b20346415e
|
3 |
+
size 3481056
|
onnx-community/Qwen2-VL-2B-Instruct/.gitattributes
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
37 |
+
onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
38 |
+
onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
39 |
+
onnx/vision_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
40 |
+
onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
41 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
onnx-community/Qwen2-VL-2B-Instruct/README.md
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
library_name: transformers.js
|
4 |
+
base_model: Qwen/Qwen2-VL-2B-Instruct
|
5 |
+
---
|
6 |
+
|
7 |
+
https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct with ONNX weights to be compatible with Transformers.js.
|
8 |
+
|
9 |
+
## Usage (Transformers.js)
|
10 |
+
|
11 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
12 |
+
```bash
|
13 |
+
npm i @huggingface/transformers
|
14 |
+
```
|
15 |
+
|
16 |
+
**Example:** Image+text to text
|
17 |
+
|
18 |
+
```js
|
19 |
+
import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
|
20 |
+
|
21 |
+
// Load processor and model
|
22 |
+
const model_id = "onnx-community/Qwen2-VL-2B-Instruct";
|
23 |
+
const processor = await AutoProcessor.from_pretrained(model_id);
|
24 |
+
const model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id);
|
25 |
+
|
26 |
+
// Prepare inputs
|
27 |
+
const url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg";
|
28 |
+
const image = await (await RawImage.read(url)).resize(448, 448);
|
29 |
+
const conversation = [
|
30 |
+
{
|
31 |
+
role: "user",
|
32 |
+
content: [
|
33 |
+
{ type: "image" },
|
34 |
+
{ type: "text", text: "Describe this image." },
|
35 |
+
],
|
36 |
+
},
|
37 |
+
];
|
38 |
+
const text = processor.apply_chat_template(conversation, { add_generation_prompt: true });
|
39 |
+
const inputs = await processor(text, image);
|
40 |
+
|
41 |
+
// Perform inference
|
42 |
+
const outputs = await model.generate({
|
43 |
+
...inputs,
|
44 |
+
max_new_tokens: 128,
|
45 |
+
});
|
46 |
+
|
47 |
+
// Decode output
|
48 |
+
const decoded = processor.batch_decode(
|
49 |
+
outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
|
50 |
+
{ skip_special_tokens: true },
|
51 |
+
);
|
52 |
+
console.log(decoded[0]);
|
53 |
+
// The image depicts a serene beach scene with a woman and a dog. The woman is sitting on the sand, wearing a plaid shirt, and appears to be engaged in a playful interaction with the dog. The dog, which is a large breed, is sitting on its hind legs and appears to be reaching out to the woman, possibly to give her a high-five or a paw. The background shows the ocean with gentle waves, and the sky is clear, suggesting it might be either sunrise or sunset. The overall atmosphere is calm and relaxed, capturing a moment of connection between the woman and the dog.
|
54 |
+
```
|
55 |
+
|
56 |
+
## ONNX conversion script:
|
57 |
+
First, install the following dependencies:
|
58 |
+
```sh
|
59 |
+
pip install --upgrade git+https://github.com/huggingface/transformers.git onnx==1.17.0 onnxruntime==1.20.1 optimum==1.23.3 onnxslim==0.1.42
|
60 |
+
```
|
61 |
+
|
62 |
+
```py
|
63 |
+
import os
|
64 |
+
import torch
|
65 |
+
from transformers import (
|
66 |
+
AutoProcessor,
|
67 |
+
Qwen2VLForConditionalGeneration,
|
68 |
+
DynamicCache,
|
69 |
+
)
|
70 |
+
|
71 |
+
|
72 |
+
class PatchedQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
|
73 |
+
def forward(self, *args):
|
74 |
+
inputs_embeds, attention_mask, position_ids, *past_key_values_args = args
|
75 |
+
|
76 |
+
# Convert past_key_values list to DynamicCache
|
77 |
+
if len(past_key_values_args) == 0:
|
78 |
+
past_key_values = None
|
79 |
+
else:
|
80 |
+
past_key_values = DynamicCache(self.config.num_hidden_layers)
|
81 |
+
for i in range(self.config.num_hidden_layers):
|
82 |
+
key = past_key_values_args.pop(0)
|
83 |
+
value = past_key_values_args.pop(0)
|
84 |
+
past_key_values.update(key_states=key, value_states=value, layer_idx=i)
|
85 |
+
|
86 |
+
o = super().forward(
|
87 |
+
inputs_embeds=inputs_embeds,
|
88 |
+
attention_mask=attention_mask,
|
89 |
+
position_ids=position_ids,
|
90 |
+
past_key_values=past_key_values,
|
91 |
+
)
|
92 |
+
|
93 |
+
flattened_past_key_values_outputs = {
|
94 |
+
"logits": o.logits,
|
95 |
+
}
|
96 |
+
output_past_key_values: DynamicCache = o.past_key_values
|
97 |
+
for i, (key, value) in enumerate(
|
98 |
+
zip(output_past_key_values.key_cache, output_past_key_values.value_cache)
|
99 |
+
):
|
100 |
+
flattened_past_key_values_outputs[f"present.{i}.key"] = key
|
101 |
+
flattened_past_key_values_outputs[f"present.{i}.value"] = value
|
102 |
+
|
103 |
+
return flattened_past_key_values_outputs
|
104 |
+
|
105 |
+
|
106 |
+
# Constants
|
107 |
+
OUTPUT_FOLDER = "output"
|
108 |
+
EMBEDDING_MODEL_NAME = "embed_tokens.onnx"
|
109 |
+
TEXT_MODEL_NAME = "decoder_model_merged.onnx"
|
110 |
+
VISION_MODEL_NAME = "vision_encoder.onnx"
|
111 |
+
TEMP_MODEL_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "temp")
|
112 |
+
FINAL_MODEL_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "onnx")
|
113 |
+
|
114 |
+
|
115 |
+
# Load model and processor
|
116 |
+
model_id = "Qwen/Qwen2-VL-2B-Instruct"
|
117 |
+
model = PatchedQwen2VLForConditionalGeneration.from_pretrained(model_id).eval()
|
118 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
119 |
+
|
120 |
+
|
121 |
+
# Save model configs and processor
|
122 |
+
model.config.save_pretrained(OUTPUT_FOLDER)
|
123 |
+
model.generation_config.save_pretrained(OUTPUT_FOLDER)
|
124 |
+
processor.save_pretrained(OUTPUT_FOLDER)
|
125 |
+
os.makedirs(TEMP_MODEL_OUTPUT_FOLDER, exist_ok=True)
|
126 |
+
|
127 |
+
|
128 |
+
# Configuration values
|
129 |
+
## Text model
|
130 |
+
text_config = model.config
|
131 |
+
num_heads = text_config.num_attention_heads
|
132 |
+
num_key_value_heads = text_config.num_key_value_heads
|
133 |
+
head_dim = text_config.hidden_size // num_heads
|
134 |
+
num_layers = text_config.num_hidden_layers
|
135 |
+
hidden_size = text_config.hidden_size
|
136 |
+
|
137 |
+
## Vision model
|
138 |
+
vision_config = model.config.vision_config
|
139 |
+
channel = vision_config.in_chans
|
140 |
+
temporal_patch_size = vision_config.temporal_patch_size
|
141 |
+
patch_size = vision_config.spatial_patch_size
|
142 |
+
|
143 |
+
|
144 |
+
# Dummy input sizes
|
145 |
+
grid_t, grid_h, grid_w = [1, 16, 16]
|
146 |
+
batch_size = 1
|
147 |
+
sequence_length = 16
|
148 |
+
num_channels = 3
|
149 |
+
past_sequence_length = 0
|
150 |
+
|
151 |
+
image_batch_size = 1 # TODO: Add support for > 1 images
|
152 |
+
assert image_batch_size == 1
|
153 |
+
|
154 |
+
|
155 |
+
# Dummy inputs
|
156 |
+
## Embedding inputs
|
157 |
+
input_ids = torch.randint(
|
158 |
+
0, model.config.vocab_size, (batch_size, sequence_length), dtype=torch.int64
|
159 |
+
)
|
160 |
+
|
161 |
+
## Text inputs
|
162 |
+
dummy_past_key_values_kwargs = {
|
163 |
+
f"past_key_values.{i}.{key}": torch.zeros(
|
164 |
+
batch_size,
|
165 |
+
num_key_value_heads,
|
166 |
+
past_sequence_length,
|
167 |
+
head_dim,
|
168 |
+
dtype=torch.float32,
|
169 |
+
)
|
170 |
+
for i in range(num_layers)
|
171 |
+
for key in ["key", "value"]
|
172 |
+
}
|
173 |
+
inputs_embeds = torch.ones(
|
174 |
+
batch_size, sequence_length, hidden_size, dtype=torch.float32
|
175 |
+
)
|
176 |
+
attention_mask = torch.ones(batch_size, sequence_length, dtype=torch.int64)
|
177 |
+
position_ids = torch.ones(3, batch_size, sequence_length, dtype=torch.int64)
|
178 |
+
|
179 |
+
## Vision inputs
|
180 |
+
grid_thw = torch.tensor(
|
181 |
+
[[grid_t, grid_h, grid_w]] * image_batch_size, dtype=torch.int64
|
182 |
+
)
|
183 |
+
pixel_values = torch.randn(
|
184 |
+
image_batch_size * grid_t * grid_h * grid_w,
|
185 |
+
channel * temporal_patch_size * patch_size * patch_size,
|
186 |
+
dtype=torch.float32,
|
187 |
+
)
|
188 |
+
|
189 |
+
|
190 |
+
# ONNX Exports
|
191 |
+
## Embedding model
|
192 |
+
embedding_inputs = dict(input_ids=input_ids)
|
193 |
+
embedding_inputs_positional = tuple(embedding_inputs.values())
|
194 |
+
model.model.embed_tokens(*embedding_inputs_positional) # Test forward pass
|
195 |
+
EMBED_TOKENS_OUTPUT_PATH = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, EMBEDDING_MODEL_NAME)
|
196 |
+
torch.onnx.export(
|
197 |
+
model.model.embed_tokens,
|
198 |
+
args=embedding_inputs_positional,
|
199 |
+
f=EMBED_TOKENS_OUTPUT_PATH,
|
200 |
+
export_params=True,
|
201 |
+
opset_version=14,
|
202 |
+
do_constant_folding=True,
|
203 |
+
input_names=list(embedding_inputs.keys()),
|
204 |
+
output_names=["inputs_embeds"],
|
205 |
+
dynamic_axes={
|
206 |
+
"input_ids": {0: "batch_size", 1: "sequence_length"},
|
207 |
+
"inputs_embeds": {0: "batch_size", 1: "sequence_length"},
|
208 |
+
},
|
209 |
+
)
|
210 |
+
|
211 |
+
## Text model
|
212 |
+
text_inputs = dict(
|
213 |
+
inputs_embeds=inputs_embeds,
|
214 |
+
attention_mask=attention_mask,
|
215 |
+
position_ids=position_ids,
|
216 |
+
**dummy_past_key_values_kwargs,
|
217 |
+
)
|
218 |
+
text_inputs_positional = tuple(text_inputs.values())
|
219 |
+
text_outputs = model.forward(*text_inputs_positional) # Test forward pass
|
220 |
+
TEXT_MODEL_OUTPUT_PATH=os.path.join(TEMP_MODEL_OUTPUT_FOLDER, TEXT_MODEL_NAME)
|
221 |
+
torch.onnx.export(
|
222 |
+
model,
|
223 |
+
args=text_inputs_positional,
|
224 |
+
f=TEXT_MODEL_OUTPUT_PATH,
|
225 |
+
export_params=True,
|
226 |
+
opset_version=14,
|
227 |
+
do_constant_folding=True,
|
228 |
+
input_names=list(text_inputs.keys()),
|
229 |
+
output_names=["logits"]
|
230 |
+
+ [f"present.{i}.{key}" for i in range(num_layers) for key in ["key", "value"]],
|
231 |
+
dynamic_axes={
|
232 |
+
"inputs_embeds": {0: "batch_size", 1: "sequence_length"},
|
233 |
+
"attention_mask": {0: "batch_size", 1: "sequence_length"},
|
234 |
+
"position_ids": {1: "batch_size", 2: "sequence_length"},
|
235 |
+
**{
|
236 |
+
f"past_key_values.{i}.{key}": {0: "batch_size", 2: "past_sequence_length"}
|
237 |
+
for i in range(num_layers)
|
238 |
+
for key in ["key", "value"]
|
239 |
+
},
|
240 |
+
"logits": {0: "batch_size", 1: "sequence_length"},
|
241 |
+
**{
|
242 |
+
f"present.{i}.{key}": {0: "batch_size", 2: "past_sequence_length + 1"}
|
243 |
+
for i in range(num_layers)
|
244 |
+
for key in ["key", "value"]
|
245 |
+
},
|
246 |
+
},
|
247 |
+
)
|
248 |
+
|
249 |
+
## Vision model
|
250 |
+
vision_inputs = dict(
|
251 |
+
pixel_values=pixel_values,
|
252 |
+
grid_thw=grid_thw,
|
253 |
+
)
|
254 |
+
vision_inputs_positional = tuple(vision_inputs.values())
|
255 |
+
vision_outputs = model.visual.forward(*vision_inputs_positional) # Test forward pass
|
256 |
+
VISION_ENCODER_OUTPUT_PATH = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, VISION_MODEL_NAME)
|
257 |
+
torch.onnx.export(
|
258 |
+
model.visual,
|
259 |
+
args=vision_inputs_positional,
|
260 |
+
f=VISION_ENCODER_OUTPUT_PATH,
|
261 |
+
export_params=True,
|
262 |
+
opset_version=14,
|
263 |
+
do_constant_folding=True,
|
264 |
+
input_names=list(vision_inputs.keys()),
|
265 |
+
output_names=["image_features"],
|
266 |
+
dynamic_axes={
|
267 |
+
"pixel_values": {
|
268 |
+
0: "batch_size * grid_t * grid_h * grid_w",
|
269 |
+
1: "channel * temporal_patch_size * patch_size * patch_size",
|
270 |
+
},
|
271 |
+
"grid_thw": {0: "batch_size"},
|
272 |
+
"image_features": {0: "batch_size * grid_t * grid_h * grid_w"},
|
273 |
+
},
|
274 |
+
)
|
275 |
+
|
276 |
+
|
277 |
+
# Post-processing
|
278 |
+
import onnx
|
279 |
+
import onnxslim
|
280 |
+
from optimum.onnx.graph_transformations import check_and_save_model
|
281 |
+
|
282 |
+
os.makedirs(FINAL_MODEL_OUTPUT_FOLDER, exist_ok=True)
|
283 |
+
for name in (EMBEDDING_MODEL_NAME, TEXT_MODEL_NAME, VISION_MODEL_NAME):
|
284 |
+
temp_model_path = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, name)
|
285 |
+
|
286 |
+
## Shape inference (especially needed by the vision encoder)
|
287 |
+
onnx.shape_inference.infer_shapes_path(temp_model_path, check_type=True, strict_mode=True)
|
288 |
+
|
289 |
+
## Attempt to optimize the model with onnxslim
|
290 |
+
try:
|
291 |
+
model = onnxslim.slim(temp_model_path)
|
292 |
+
except Exception as e:
|
293 |
+
print(f"Failed to slim {model}: {e}")
|
294 |
+
model = onnx.load(temp_model_path)
|
295 |
+
|
296 |
+
## Save model
|
297 |
+
final_model_path = os.path.join(FINAL_MODEL_OUTPUT_FOLDER, name)
|
298 |
+
check_and_save_model(model, final_model_path)
|
299 |
+
|
300 |
+
## Cleanup
|
301 |
+
import shutil
|
302 |
+
shutil.rmtree(TEMP_MODEL_OUTPUT_FOLDER)
|
303 |
+
```
|
onnx-community/Qwen2-VL-2B-Instruct/added_tokens.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|box_end|>": 151649,
|
3 |
+
"<|box_start|>": 151648,
|
4 |
+
"<|endoftext|>": 151643,
|
5 |
+
"<|im_end|>": 151645,
|
6 |
+
"<|im_start|>": 151644,
|
7 |
+
"<|image_pad|>": 151655,
|
8 |
+
"<|object_ref_end|>": 151647,
|
9 |
+
"<|object_ref_start|>": 151646,
|
10 |
+
"<|quad_end|>": 151651,
|
11 |
+
"<|quad_start|>": 151650,
|
12 |
+
"<|video_pad|>": 151656,
|
13 |
+
"<|vision_end|>": 151653,
|
14 |
+
"<|vision_pad|>": 151654,
|
15 |
+
"<|vision_start|>": 151652
|
16 |
+
}
|
onnx-community/Qwen2-VL-2B-Instruct/chat_template.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
|
3 |
+
}
|
onnx-community/Qwen2-VL-2B-Instruct/config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
|
4 |
+
"architectures": [
|
5 |
+
"Qwen2VLForConditionalGeneration"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 151643,
|
9 |
+
"eos_token_id": 151645,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1536,
|
12 |
+
"image_token_id": 151655,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 8960,
|
15 |
+
"max_position_embeddings": 32768,
|
16 |
+
"max_window_layers": 28,
|
17 |
+
"model_type": "qwen2_vl",
|
18 |
+
"num_attention_heads": 12,
|
19 |
+
"num_hidden_layers": 28,
|
20 |
+
"num_key_value_heads": 2,
|
21 |
+
"rms_norm_eps": 1e-06,
|
22 |
+
"rope_scaling": {
|
23 |
+
"mrope_section": [
|
24 |
+
16,
|
25 |
+
24,
|
26 |
+
24
|
27 |
+
],
|
28 |
+
"rope_type": "default",
|
29 |
+
"type": "default"
|
30 |
+
},
|
31 |
+
"rope_theta": 1000000.0,
|
32 |
+
"sliding_window": 32768,
|
33 |
+
"tie_word_embeddings": true,
|
34 |
+
"torch_dtype": "bfloat16",
|
35 |
+
"transformers_version": "4.46.1",
|
36 |
+
"transformers.js_config": {
|
37 |
+
"dtype": {
|
38 |
+
"vision_encoder": "q8",
|
39 |
+
"embed_tokens": "fp16",
|
40 |
+
"decoder_model_merged": "q4"
|
41 |
+
}
|
42 |
+
},
|
43 |
+
"use_cache": true,
|
44 |
+
"use_sliding_window": false,
|
45 |
+
"video_token_id": 151656,
|
46 |
+
"vision_config": {
|
47 |
+
"hidden_size": 1536,
|
48 |
+
"in_chans": 3,
|
49 |
+
"model_type": "qwen2_vl",
|
50 |
+
"spatial_patch_size": 14
|
51 |
+
},
|
52 |
+
"vision_end_token_id": 151653,
|
53 |
+
"vision_start_token_id": 151652,
|
54 |
+
"vision_token_id": 151654,
|
55 |
+
"vocab_size": 151936
|
56 |
+
}
|
onnx-community/Qwen2-VL-2B-Instruct/generation_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
151645,
|
6 |
+
151643
|
7 |
+
],
|
8 |
+
"pad_token_id": 151643,
|
9 |
+
"temperature": 0.01,
|
10 |
+
"top_k": 1,
|
11 |
+
"top_p": 0.001,
|
12 |
+
"transformers_version": "4.46.1"
|
13 |
+
}
|
onnx-community/Qwen2-VL-2B-Instruct/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfb545b801fe12aadf63e6fea2672118953a9ea4a6f93f3f54ef5a5abf640536
|
3 |
+
size 813442
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6192eca60b017bd1065578fcd5ef536557a2ae197196101bbd4844dd55b551c
|
3 |
+
size 6174857216
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f2cf55550007bed1c5453270ea5fc236bddbbc22ca7ba1e7772b514372d508b
|
3 |
+
size 869655116
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63a6d8aaadbd997dd1a3bcb047814e2e11106ace5bd05463e34341e18625261e
|
3 |
+
size 853568
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dea38e3e9305ad4b63db905f2d55b64fd201a8178a27495d406188a6c437139
|
3 |
+
size 3087399936
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c747059a7294565f59dc6c6852af398477c53eaabf8c605a313d21e71f90cdf
|
3 |
+
size 1545150393
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5492bb8064c31f61c48106b3cb140f1a81f9123205f52234329e1119f348eae
|
3 |
+
size 966126572
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cbf3e285eb58d975fef96da82faee0277416b5c7c3342718f0fcc0330b27033
|
3 |
+
size 869378228
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c747059a7294565f59dc6c6852af398477c53eaabf8c605a313d21e71f90cdf
|
3 |
+
size 1545150393
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:853451575e3be3bfe1a0506e6b2b858853f1ab1d974bed48d11fc7868b06138a
|
3 |
+
size 1545150497
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:152d4247d40033d1b6f23ba4b25674850fbaf1b10d259f74da69b31944114cd6
|
3 |
+
size 933495086
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88541f1b9947f617d4c1d5cea78ab8cb26574f74db5a7d64d7a9db22099aaa30
|
3 |
+
size 933495105
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a717f0d2010b07f99b1982b5407f4c55143ca9ec45cb574b3ba272211024c661
|
3 |
+
size 466747781
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
|
3 |
+
size 233374240
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88541f1b9947f617d4c1d5cea78ab8cb26574f74db5a7d64d7a9db22099aaa30
|
3 |
+
size 933495105
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1697b2fd5563f22684826e9d6c5c24f2a8da74caa44b123b51f7eab10af1a851
|
3 |
+
size 466747800
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
|
3 |
+
size 233374240
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
|
3 |
+
size 233374240
|
onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b40135a66ba4b497b7b41252cc48abcfce280897d36545ac8c1e843f9d41f85a
|
3 |
+
size 1683152
|