niobures commited on Jun 26

Commit

c3dd1c2

verified ·

1 Parent(s): 182154c

LaViLa (EK-100_MIR), Llava-v1.5-7B-GGUF, MobileVLM, Qwen2-VL-2B-Instruct, YOLO v8n, YOLO World

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +19 -0
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf +3 -0
LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf +3 -0
LaViLa/EK-100_MIR/TSF-B/clip_openai_timesformer_base.ft_ek100_mir.ep_0085.md5sum_c67d95.pth +3 -0
MobileVLM/MobileVLM-1.7B-GGUF/.gitattributes +39 -0
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf +3 -0
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf +3 -0
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf +3 -0
MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf +3 -0
MobileVLM/MobileVLM_V2-1.7B-GGUF/.gitattributes +37 -0
MobileVLM/MobileVLM_V2-1.7B-GGUF/README.md +16 -0
MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf +3 -0
MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf +3 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/.gitattributes +41 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf +3 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf +3 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf +3 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf +3 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/README.md +29 -0
Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf +3 -0
YOLO/yolo-world/yolo-world-s.pt +3 -0
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/labels_yolov8n_silu_coco.json +82 -0
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.json +67 -0
YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.tflite +3 -0
onnx-community/Qwen2-VL-2B-Instruct/.gitattributes +41 -0
onnx-community/Qwen2-VL-2B-Instruct/README.md +303 -0
onnx-community/Qwen2-VL-2B-Instruct/added_tokens.json +16 -0
onnx-community/Qwen2-VL-2B-Instruct/chat_template.json +3 -0
onnx-community/Qwen2-VL-2B-Instruct/config.json +56 -0
onnx-community/Qwen2-VL-2B-Instruct/generation_config.json +13 -0
onnx-community/Qwen2-VL-2B-Instruct/merges.txt +0 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_bnb4.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_int8.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4f16.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_quantized.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_uint8.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_bnb4.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_fp16.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_int8.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4f16.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_quantized.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_uint8.onnx +3 -0
onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,22 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text
+LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx-community/Qwen2-VL-2B-Instruct/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text

LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-Q2_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dde114456d7b4ea5cc07c75e363f9807db1dcf82f61168cc91969316601ca467
+size 2532863776

LLaVA/Llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-model-f16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50da4e5b0a011615f77686f9b02613571e65d23083c225e107c08c3b1775d9b1
+size 624434368

LaViLa/EK-100_MIR/TSF-B/clip_openai_timesformer_base.ft_ek100_mir.ep_0085.md5sum_c67d95.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17e8302ae99b607459b7c01480bf85a7ba664b8afc91b0bbf6a9be662887c91d
+size 710829395

MobileVLM/MobileVLM-1.7B-GGUF/.gitattributes ADDED Viewed

	@@ -0,0 +1,39 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+MobileVLM-1.7B-mmproj-f16.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM-1.7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM-1.7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
+MobileVLM-1.7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text

MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q4_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41f0487ea4d3a4f58c467d779bfe66c02d9b33fa2d33b85cf456df89eb35dab9
+size 834055776

MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q5_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca45d185225a61c0b003b7a4fe072ddae9ea57bbda7e7025e7dec50421192b8
+size 972795488

MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-Q6_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73ee20a00293a6419e311c349f7dc257ce0a15e9e59962933890204a0fcc0881
+size 1120206432

MobileVLM/MobileVLM-1.7B-GGUF/MobileVLM-1.7B-mmproj-f16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d9855d323cee2a1797a88f9d7057ce26b21dcd62a50b382c4ff44ea60c77e39
+size 620384896

MobileVLM/MobileVLM_V2-1.7B-GGUF/.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+ggml-model-q4_k.gguf filter=lfs diff=lfs merge=lfs -text
+mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text

MobileVLM/MobileVLM_V2-1.7B-GGUF/README.md ADDED Viewed

	@@ -0,0 +1,16 @@

+---
+license: apache-2.0
+tags:
+- MobileVLM V2
+---
+## Model Summery
+MobileVLM V2 is a family of significantly improved vision language models upon MobileVLM, which proves that a delicate orchestration of novel architectural design, an improved training scheme tailored for mobile VLMs, and rich high-quality dataset curation can substantially benefit VLMs’ performance. Specifically, MobileVLM V2 1.7B achieves better or on-par performance on standard VLM benchmarks compared with much larger VLMs at the 3B scale. Notably, MobileVLM_V2-3B model outperforms a large variety of VLMs at the 7B+ scale.
+The MobileVLM_V2-1.7B was built on our [MobileLLaMA-1.4B-Chat](](https://huggingface.co/mtgv/MobileLLaMA-1.4B-Chat)) to facilitate the off-the-shelf deployment.
+## Model Sources
+- Repository: https://github.com/Meituan-AutoML/MobileVLM
+- Paper: [MobileVLM V2: Faster and Stronger Baseline for Vision Language Model](https://arxiv.org/abs/2402.03766)
+## How to Get Started with the Model
+Inference examples can be found at [Github](https://github.com/Meituan-AutoML/MobileVLM).

MobileVLM/MobileVLM_V2-1.7B-GGUF/ggml-model-q4_k.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15d4bd09293404831902c23dd898aa2cc7b4b223b6c39a64e330601ef72d99db
+size 791817856

MobileVLM/MobileVLM_V2-1.7B-GGUF/mmproj-model-f16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57966afa654e9d46a11b2a4b17989c2d487cd961f702c4fe310f86db5e30aab4
+size 595103072

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/.gitattributes ADDED Viewed

	@@ -0,0 +1,41 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL-2B-Instruct-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL-2B-Instruct-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL-2B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2-VL-2B-Instruct-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
+mmproj-Qwen2-VL-2B-Instruct-f32.gguf filter=lfs diff=lfs merge=lfs -text
+mmproj-model-f32.gguf filter=lfs diff=lfs merge=lfs -text

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q3_K_L.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:feb289177bcb04bec880720eb7a10890148f8a6586d911b921a301f55d632807
+size 880161248

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20868587821c6b9f82089daac35918f8150c3568e3c4a2cdd51a24b6dd75ab79
+size 986046944

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q6_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7505e951bbeda59f36e0ff413f5a76fec9281f640613e549e0f4398329cf631
+size 1272738272

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/Qwen2-VL-2B-Instruct-Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c11dff3f74b326668600607a04b412c320d9dfe917a77f48ed6abbd962dd4a44
+size 1646571488

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/README.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+quantized_by: bartowski
+pipeline_tag: text-generation
+---
+## 💫 Community Model> Qwen2 VL 2B Instruct by Qwen
+*👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)*.
+**Model creator:** [Qwen](https://huggingface.co/Qwen)<br>
+**Original model**: [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)<br>
+**GGUF quantization:** provided by [bartowski](https://huggingface.co/bartowski) based on `llama.cpp` release [b4327](https://github.com/ggerganov/llama.cpp/releases/tag/b4327)<br>
+## Technical Details
+Supports context length of 32k tokens.
+Vision model capable of understanding images of various resolutions and ratios.
+Complex reasoning for agentic automation with vision.
+Multilingual support.
+## Special thanks
+🙏 Special thanks to [Georgi Gerganov](https://github.com/ggerganov) and the whole team working on [llama.cpp](https://github.com/ggerganov/llama.cpp/) for making all of this possible.
+## Disclaimers
+LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model.  You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models.  LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.

Qwen2-VL/Qwen2-VL-2B-Instruct-GGUF/mmproj-model-f32.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5a4e3b50652c60f7a027fb113d7e9d8f9411b0702f4de4e0743f71a3355530
+size 2661115392

YOLO/yolo-world/yolo-world-s.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b36b186fa4279efabade126b2ccce57008f36523e41bf344a6fd155e49a0368
+size 27166882

YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/labels_yolov8n_silu_coco.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+    "0": "person",
+    "1": "bicycle",
+    "2": "car",
+    "3": "motorcycle",
+    "4": "airplane",
+    "5": "bus",
+    "6": "train",
+    "7": "truck",
+    "8": "boat",
+    "9": "traffic light",
+    "10": "fire hydrant",
+    "11": "stop sign",
+    "12": "parking meter",
+    "13": "bench",
+    "14": "bird",
+    "15": "cat",
+    "16": "dog",
+    "17": "horse",
+    "18": "sheep",
+    "19": "cow",
+    "20": "elephant",
+    "21": "bear",
+    "22": "zebra",
+    "23": "giraffe",
+    "24": "backpack",
+    "25": "umbrella",
+    "26": "handbag",
+    "27": "tie",
+    "28": "suitcase",
+    "29": "frisbee",
+    "30": "skis",
+    "31": "snowboard",
+    "32": "sports ball",
+    "33": "kite",
+    "34": "baseball bat",
+    "35": "baseball glove",
+    "36": "skateboard",
+    "37": "surfboard",
+    "38": "tennis racket",
+    "39": "bottle",
+    "40": "wine glass",
+    "41": "cup",
+    "42": "fork",
+    "43": "knife",
+    "44": "spoon",
+    "45": "bowl",
+    "46": "banana",
+    "47": "apple",
+    "48": "sandwich",
+    "49": "orange",
+    "50": "broccoli",
+    "51": "carrot",
+    "52": "hot dog",
+    "53": "pizza",
+    "54": "donut",
+    "55": "cake",
+    "56": "chair",
+    "57": "couch",
+    "58": "potted plant",
+    "59": "bed",
+    "60": "dining table",
+    "61": "toilet",
+    "62": "tv",
+    "63": "laptop",
+    "64": "mouse",
+    "65": "remote",
+    "66": "keyboard",
+    "67": "cell phone",
+    "68": "microwave",
+    "69": "oven",
+    "70": "toaster",
+    "71": "sink",
+    "72": "refrigerator",
+    "73": "book",
+    "74": "clock",
+    "75": "vase",
+    "76": "scissors",
+    "77": "teddy bear",
+    "78": "hair drier",
+    "79": "toothbrush"
+}

YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+    "ConfigVersion": 6,
+    "Checksum": "1b27ef9c2ceea55429dd1c98a13510e8597495a62bf5d1bf74e72b9bad883502",
+    "DEVICE": [
+        {
+            "DeviceType": "EDGETPU",
+            "RuntimeAgent": "TFLITE",
+            "SupportedDeviceTypes": "TFLITE/EDGETPU"
+        }
+    ],
+    "PRE_PROCESS": [
+        {
+            "InputN": 1,
+            "InputType": "Image",
+            "InputResizeMethod": "bilinear",
+            "InputPadMethod": "letterbox",
+            "ImageBackend": "auto",
+            "InputH": 640,
+            "InputW": 640,
+            "InputC": 3,
+            "InputQuantEn": true,
+            "InputQuantOffset": 0,
+            "InputQuantScale": 0.00392156862745098,
+            "InputImgNormEn": true,
+            "InputImgNormCoeff": 0.00392156862745098,
+            "InputNormMean": [
+                0,
+                0,
+                0
+            ],
+            "InputNormStd": [
+                1,
+                1,
+                1
+            ],
+            "InputTensorLayout": "NHWC",
+            "InputImgSliceType": "SLICE2"
+        }
+    ],
+    "MODEL_PARAMETERS": [
+        {
+            "ModelPath": "yolov8n_silu_coco--640x640_quant_tflite_edgetpu_1.tflite"
+        }
+    ],
+    "POST_PROCESS": [
+        {
+            "OutputPostprocessType": "DetectionYoloV8",
+            "PostProcessorInputs": [
+                220,
+                221,
+                225,
+                222,
+                224,
+                223
+            ],
+            "OutputNumClasses": 80,
+            "LabelsPath": "labels_yolov8n_silu_coco.json",
+            "OutputClassIDAdjustment": 0,
+            "OutputNMSThreshold": 0.6,
+            "MaxDetectionsPerClass": 100,
+            "MaxClassesPerDetection": 1,
+            "UseRegularNMS": true,
+            "OutputConfThreshold": 0.3,
+            "MaxDetections": 100
+        }
+    ]
+}

YOLO/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1/yolov8n_silu_coco_640x640_quant_tflite_edgetpu_1.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8023850f1b629752acd65c34cc6f3bdfe1e8d7985327e4ae6081a3b20346415e
+size 3481056

onnx-community/Qwen2-VL-2B-Instruct/.gitattributes ADDED Viewed

	@@ -0,0 +1,41 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/vision_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

onnx-community/Qwen2-VL-2B-Instruct/README.md ADDED Viewed

	@@ -0,0 +1,303 @@

+---
+license: apache-2.0
+library_name: transformers.js
+base_model: Qwen/Qwen2-VL-2B-Instruct
+---
+https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct with ONNX weights to be compatible with Transformers.js.
+## Usage (Transformers.js)
+If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
+```bash
+npm i @huggingface/transformers
+```
+**Example:** Image+text to text
+```js
+import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
+// Load processor and model
+const model_id = "onnx-community/Qwen2-VL-2B-Instruct";
+const processor = await AutoProcessor.from_pretrained(model_id);
+const model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id);
+// Prepare inputs
+const url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg";
+const image = await (await RawImage.read(url)).resize(448, 448);
+const conversation = [
+  {
+    role: "user",
+    content: [
+      { type: "image" },
+      { type: "text", text: "Describe this image." },
+    ],
+  },
+];
+const text = processor.apply_chat_template(conversation, { add_generation_prompt: true });
+const inputs = await processor(text, image);
+// Perform inference
+const outputs = await model.generate({
+  ...inputs,
+  max_new_tokens: 128,
+});
+// Decode output
+const decoded = processor.batch_decode(
+  outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
+  { skip_special_tokens: true },
+);
+console.log(decoded[0]);
+// The image depicts a serene beach scene with a woman and a dog. The woman is sitting on the sand, wearing a plaid shirt, and appears to be engaged in a playful interaction with the dog. The dog, which is a large breed, is sitting on its hind legs and appears to be reaching out to the woman, possibly to give her a high-five or a paw. The background shows the ocean with gentle waves, and the sky is clear, suggesting it might be either sunrise or sunset. The overall atmosphere is calm and relaxed, capturing a moment of connection between the woman and the dog.
+```
+## ONNX conversion script:
+First, install the following dependencies:
+```sh
+pip install --upgrade git+https://github.com/huggingface/transformers.git onnx==1.17.0 onnxruntime==1.20.1 optimum==1.23.3 onnxslim==0.1.42
+```
+```py
+import os
+import torch
+from transformers import (
+    AutoProcessor,
+    Qwen2VLForConditionalGeneration,
+    DynamicCache,
+)
+class PatchedQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
+    def forward(self, *args):
+        inputs_embeds, attention_mask, position_ids, *past_key_values_args = args
+        # Convert past_key_values list to DynamicCache
+        if len(past_key_values_args) == 0:
+            past_key_values = None
+        else:
+            past_key_values = DynamicCache(self.config.num_hidden_layers)
+            for i in range(self.config.num_hidden_layers):
+                key = past_key_values_args.pop(0)
+                value = past_key_values_args.pop(0)
+                past_key_values.update(key_states=key, value_states=value, layer_idx=i)
+        o = super().forward(
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+        )
+        flattened_past_key_values_outputs = {
+            "logits": o.logits,
+        }
+        output_past_key_values: DynamicCache = o.past_key_values
+        for i, (key, value) in enumerate(
+            zip(output_past_key_values.key_cache, output_past_key_values.value_cache)
+        ):
+            flattened_past_key_values_outputs[f"present.{i}.key"] = key
+            flattened_past_key_values_outputs[f"present.{i}.value"] = value
+        return flattened_past_key_values_outputs
+# Constants
+OUTPUT_FOLDER = "output"
+EMBEDDING_MODEL_NAME = "embed_tokens.onnx"
+TEXT_MODEL_NAME = "decoder_model_merged.onnx"
+VISION_MODEL_NAME = "vision_encoder.onnx"
+TEMP_MODEL_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "temp")
+FINAL_MODEL_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "onnx")
+# Load model and processor
+model_id = "Qwen/Qwen2-VL-2B-Instruct"
+model = PatchedQwen2VLForConditionalGeneration.from_pretrained(model_id).eval()
+processor = AutoProcessor.from_pretrained(model_id)
+# Save model configs and processor
+model.config.save_pretrained(OUTPUT_FOLDER)
+model.generation_config.save_pretrained(OUTPUT_FOLDER)
+processor.save_pretrained(OUTPUT_FOLDER)
+os.makedirs(TEMP_MODEL_OUTPUT_FOLDER, exist_ok=True)
+# Configuration values
+## Text model
+text_config = model.config
+num_heads = text_config.num_attention_heads
+num_key_value_heads = text_config.num_key_value_heads
+head_dim = text_config.hidden_size // num_heads
+num_layers = text_config.num_hidden_layers
+hidden_size = text_config.hidden_size
+## Vision model
+vision_config = model.config.vision_config
+channel = vision_config.in_chans
+temporal_patch_size = vision_config.temporal_patch_size
+patch_size = vision_config.spatial_patch_size
+# Dummy input sizes
+grid_t, grid_h, grid_w = [1, 16, 16]
+batch_size = 1
+sequence_length = 16
+num_channels = 3
+past_sequence_length = 0
+image_batch_size = 1  # TODO: Add support for > 1 images
+assert image_batch_size == 1
+# Dummy inputs
+## Embedding inputs
+input_ids = torch.randint(
+    0, model.config.vocab_size, (batch_size, sequence_length), dtype=torch.int64
+)
+## Text inputs
+dummy_past_key_values_kwargs = {
+    f"past_key_values.{i}.{key}": torch.zeros(
+        batch_size,
+        num_key_value_heads,
+        past_sequence_length,
+        head_dim,
+        dtype=torch.float32,
+    )
+    for i in range(num_layers)
+    for key in ["key", "value"]
+}
+inputs_embeds = torch.ones(
+    batch_size, sequence_length, hidden_size, dtype=torch.float32
+)
+attention_mask = torch.ones(batch_size, sequence_length, dtype=torch.int64)
+position_ids = torch.ones(3, batch_size, sequence_length, dtype=torch.int64)
+## Vision inputs
+grid_thw = torch.tensor(
+    [[grid_t, grid_h, grid_w]] * image_batch_size, dtype=torch.int64
+)
+pixel_values = torch.randn(
+    image_batch_size * grid_t * grid_h * grid_w,
+    channel * temporal_patch_size * patch_size * patch_size,
+    dtype=torch.float32,
+)
+# ONNX Exports
+## Embedding model
+embedding_inputs = dict(input_ids=input_ids)
+embedding_inputs_positional = tuple(embedding_inputs.values())
+model.model.embed_tokens(*embedding_inputs_positional)  # Test forward pass
+EMBED_TOKENS_OUTPUT_PATH = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, EMBEDDING_MODEL_NAME)
+torch.onnx.export(
+    model.model.embed_tokens,
+    args=embedding_inputs_positional,
+    f=EMBED_TOKENS_OUTPUT_PATH,
+    export_params=True,
+    opset_version=14,
+    do_constant_folding=True,
+    input_names=list(embedding_inputs.keys()),
+    output_names=["inputs_embeds"],
+    dynamic_axes={
+        "input_ids": {0: "batch_size", 1: "sequence_length"},
+        "inputs_embeds": {0: "batch_size", 1: "sequence_length"},
+    },
+)
+## Text model
+text_inputs = dict(
+    inputs_embeds=inputs_embeds,
+    attention_mask=attention_mask,
+    position_ids=position_ids,
+    **dummy_past_key_values_kwargs,
+)
+text_inputs_positional = tuple(text_inputs.values())
+text_outputs = model.forward(*text_inputs_positional)  # Test forward pass
+TEXT_MODEL_OUTPUT_PATH=os.path.join(TEMP_MODEL_OUTPUT_FOLDER, TEXT_MODEL_NAME)
+torch.onnx.export(
+    model,
+    args=text_inputs_positional,
+    f=TEXT_MODEL_OUTPUT_PATH,
+    export_params=True,
+    opset_version=14,
+    do_constant_folding=True,
+    input_names=list(text_inputs.keys()),
+    output_names=["logits"]
+    + [f"present.{i}.{key}" for i in range(num_layers) for key in ["key", "value"]],
+    dynamic_axes={
+        "inputs_embeds": {0: "batch_size", 1: "sequence_length"},
+        "attention_mask": {0: "batch_size", 1: "sequence_length"},
+        "position_ids": {1: "batch_size", 2: "sequence_length"},
+        **{
+            f"past_key_values.{i}.{key}": {0: "batch_size", 2: "past_sequence_length"}
+            for i in range(num_layers)
+            for key in ["key", "value"]
+        },
+        "logits": {0: "batch_size", 1: "sequence_length"},
+        **{
+            f"present.{i}.{key}": {0: "batch_size", 2: "past_sequence_length + 1"}
+            for i in range(num_layers)
+            for key in ["key", "value"]
+        },
+    },
+)
+## Vision model
+vision_inputs = dict(
+    pixel_values=pixel_values,
+    grid_thw=grid_thw,
+)
+vision_inputs_positional = tuple(vision_inputs.values())
+vision_outputs = model.visual.forward(*vision_inputs_positional)  # Test forward pass
+VISION_ENCODER_OUTPUT_PATH = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, VISION_MODEL_NAME)
+torch.onnx.export(
+    model.visual,
+    args=vision_inputs_positional,
+    f=VISION_ENCODER_OUTPUT_PATH,
+    export_params=True,
+    opset_version=14,
+    do_constant_folding=True,
+    input_names=list(vision_inputs.keys()),
+    output_names=["image_features"],
+    dynamic_axes={
+        "pixel_values": {
+            0: "batch_size * grid_t * grid_h * grid_w",
+            1: "channel * temporal_patch_size * patch_size * patch_size",
+        },
+        "grid_thw": {0: "batch_size"},
+        "image_features": {0: "batch_size * grid_t * grid_h * grid_w"},
+    },
+)
+# Post-processing
+import onnx
+import onnxslim
+from optimum.onnx.graph_transformations import check_and_save_model
+os.makedirs(FINAL_MODEL_OUTPUT_FOLDER, exist_ok=True)
+for name in (EMBEDDING_MODEL_NAME, TEXT_MODEL_NAME, VISION_MODEL_NAME):
+    temp_model_path = os.path.join(TEMP_MODEL_OUTPUT_FOLDER, name)
+    ## Shape inference (especially needed by the vision encoder)
+    onnx.shape_inference.infer_shapes_path(temp_model_path, check_type=True, strict_mode=True)
+    ## Attempt to optimize the model with onnxslim
+    try:
+        model = onnxslim.slim(temp_model_path)
+    except Exception as e:
+        print(f"Failed to slim {model}: {e}")
+        model = onnx.load(temp_model_path)
+    ## Save model
+    final_model_path = os.path.join(FINAL_MODEL_OUTPUT_FOLDER, name)
+    check_and_save_model(model, final_model_path)
+## Cleanup
+import shutil
+shutil.rmtree(TEMP_MODEL_OUTPUT_FOLDER)
+```

onnx-community/Qwen2-VL-2B-Instruct/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

onnx-community/Qwen2-VL-2B-Instruct/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

onnx-community/Qwen2-VL-2B-Instruct/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
+  "architectures": [
+    "Qwen2VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.1",
+  "transformers.js_config": {
+    "dtype": {
+      "vision_encoder": "q8",
+      "embed_tokens": "fp16",
+      "decoder_model_merged": "q4"
+    }
+  },
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "hidden_size": 1536,
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 151936
+}

onnx-community/Qwen2-VL-2B-Instruct/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.01,
+  "top_k": 1,
+  "top_p": 0.001,
+  "transformers_version": "4.46.1"
+}

onnx-community/Qwen2-VL-2B-Instruct/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfb545b801fe12aadf63e6fea2672118953a9ea4a6f93f3f54ef5a5abf640536
+size 813442

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6192eca60b017bd1065578fcd5ef536557a2ae197196101bbd4844dd55b551c
+size 6174857216

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f2cf55550007bed1c5453270ea5fc236bddbbc22ca7ba1e7772b514372d508b
+size 869655116

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63a6d8aaadbd997dd1a3bcb047814e2e11106ace5bd05463e34341e18625261e
+size 853568

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_fp16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4dea38e3e9305ad4b63db905f2d55b64fd201a8178a27495d406188a6c437139
+size 3087399936

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c747059a7294565f59dc6c6852af398477c53eaabf8c605a313d21e71f90cdf
+size 1545150393

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5492bb8064c31f61c48106b3cb140f1a81f9123205f52234329e1119f348eae
+size 966126572

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cbf3e285eb58d975fef96da82faee0277416b5c7c3342718f0fcc0330b27033
+size 869378228

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c747059a7294565f59dc6c6852af398477c53eaabf8c605a313d21e71f90cdf
+size 1545150393

onnx-community/Qwen2-VL-2B-Instruct/onnx/decoder_model_merged_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:853451575e3be3bfe1a0506e6b2b858853f1ab1d974bed48d11fc7868b06138a
+size 1545150497

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:152d4247d40033d1b6f23ba4b25674850fbaf1b10d259f74da69b31944114cd6
+size 933495086

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88541f1b9947f617d4c1d5cea78ab8cb26574f74db5a7d64d7a9db22099aaa30
+size 933495105

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a717f0d2010b07f99b1982b5407f4c55143ca9ec45cb574b3ba272211024c661
+size 466747781

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
+size 233374240

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88541f1b9947f617d4c1d5cea78ab8cb26574f74db5a7d64d7a9db22099aaa30
+size 933495105

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1697b2fd5563f22684826e9d6c5c24f2a8da74caa44b123b51f7eab10af1a851
+size 466747800

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
+size 233374240

onnx-community/Qwen2-VL-2B-Instruct/onnx/embed_tokens_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0c7a8061d5a116be58f9c58637a85e0b87795426dec299c66af21a9ff77a6f
+size 233374240

onnx-community/Qwen2-VL-2B-Instruct/onnx/vision_encoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b40135a66ba4b497b7b41252cc48abcfce280897d36545ac8c1e843f9d41f85a
+size 1683152