SmilingWolf commited on
Commit
7fcd5b0
·
verified ·
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ **WARNING**: Do *not* consider anything in this repo production ready.
6
+
7
+ ### Checkpoints
8
+
9
+ - **siglip_swinv2_base_2025_02_22_18h56m54s**
10
+ Text encoder trained on top of frozen [SmilingWolf/wd-swinv2-tagger-v3](https://huggingface.co/SmilingWolf/wd-swinv2-tagger-v3), so pretty much SigLIT style. Compatible with existing DeepGHS indexes/embeddings.
11
+ - **siglip_swinv2_base_2025_05_02_22h02m36s**
12
+ Based on `siglip_swinv2_base_2025_02_22_18h56m54s`, with unfrozen image encoder. So SigLIP with warm start, I guess.
13
+ - **siglip_eva02_base_2025_05_02_21h53m54s**
14
+ A test with a different architecture, trained from scratch using SigLIP.
15
+
16
+ ### Usage Example
17
+
18
+ See [deepghs/search_image_by_image_or_text](https://huggingface.co/spaces/deepghs/search_image_by_image_or_text) for example usage.
19
+
20
+ ### Compatibility
21
+
22
+ The checkpoints in this repo have been structured for compatibility with the [dghs-realutils](https://github.com/deepghs/realutils) package.
smilingwolf/siglip_eva02_base_2025_05_02_21h53m54s/image_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb756a8a91ed0fdf7f50f9a2a17534261b0471f03d5d6966ff8e931e7a436ccf
3
+ size 342364275
smilingwolf/siglip_eva02_base_2025_05_02_21h53m54s/meta.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_embedding_width": 768,
3
+ "image_encoding_width": 768,
4
+ "image_flops": 77187354624.0,
5
+ "image_params": 85473024.0,
6
+ "image_size": 420,
7
+ "logit_bias": -11.471226692199707,
8
+ "logit_scale": 4.604705333709717,
9
+ "name": null,
10
+ "repo_created_at": null,
11
+ "repo_id": null,
12
+ "text_embedding_width": 768,
13
+ "text_encoding_width": 768,
14
+ "text_flops": 10882449408.0,
15
+ "text_params": 85545216.0
16
+ }
smilingwolf/siglip_eva02_base_2025_05_02_21h53m54s/preprocessor.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stages": [
3
+ {
4
+ "force_background": "white",
5
+ "type": "convert_rgb"
6
+ },
7
+ {
8
+ "antialias": true,
9
+ "interpolation": "bicubic",
10
+ "max_size": 420,
11
+ "size": 420,
12
+ "type": "resize"
13
+ },
14
+ {
15
+ "size": 420,
16
+ "type": "center_crop"
17
+ },
18
+ {
19
+ "type": "maybe_to_tensor"
20
+ },
21
+ {
22
+ "mean": [
23
+ 0.5,
24
+ 0.5,
25
+ 0.5
26
+ ],
27
+ "std": [
28
+ 0.5,
29
+ 0.5,
30
+ 0.5
31
+ ],
32
+ "type": "normalize"
33
+ }
34
+ ]
35
+ }
smilingwolf/siglip_eva02_base_2025_05_02_21h53m54s/text_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db2688dac121368b9f46c0d6e308940c4045cd1bd8913e157b2073e177f06c01
3
+ size 392829190
smilingwolf/siglip_eva02_base_2025_05_02_21h53m54s/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
smilingwolf/siglip_swinv2_base_2025_02_22_18h56m54s/image_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6c8402ba3179af06c1e9c5d12c1c9177c9282ad700ecdc51e3e2ad4644c4a2
3
+ size 417933798
smilingwolf/siglip_swinv2_base_2025_02_22_18h56m54s/meta.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_embedding_width": 1024,
3
+ "image_encoding_width": 1024,
4
+ "image_flops": 46201623808.0,
5
+ "image_params": 65930624.0,
6
+ "image_size": 448,
7
+ "logit_bias": -9.808648109436035,
8
+ "logit_scale": 5.067012310028076,
9
+ "name": null,
10
+ "repo_created_at": null,
11
+ "repo_id": null,
12
+ "text_embedding_width": 1024,
13
+ "text_encoding_width": 1024,
14
+ "text_flops": 10882646016.0,
15
+ "text_params": 85742080.0
16
+ }
smilingwolf/siglip_swinv2_base_2025_02_22_18h56m54s/preprocessor.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stages": [
3
+ {
4
+ "force_background": "white",
5
+ "type": "convert_rgb"
6
+ },
7
+ {
8
+ "antialias": true,
9
+ "interpolation": "bicubic",
10
+ "max_size": 448,
11
+ "size": 448,
12
+ "type": "resize"
13
+ },
14
+ {
15
+ "size": 448,
16
+ "type": "center_crop"
17
+ },
18
+ {
19
+ "type": "maybe_to_tensor"
20
+ },
21
+ {
22
+ "mean": [
23
+ 0.5,
24
+ 0.5,
25
+ 0.5
26
+ ],
27
+ "std": [
28
+ 0.5,
29
+ 0.5,
30
+ 0.5
31
+ ],
32
+ "type": "normalize"
33
+ }
34
+ ]
35
+ }
smilingwolf/siglip_swinv2_base_2025_02_22_18h56m54s/text_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254f1c188e71598c5cb3214f4d34ed8e9038f6ce06d6640b9546bc043b9beb5f
3
+ size 393614582
smilingwolf/siglip_swinv2_base_2025_02_22_18h56m54s/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
smilingwolf/siglip_swinv2_base_2025_05_02_22h02m36s/image_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2b5724fe3a9cdea66bb6b08c44bd34adffc46a0daa8a310a4a53c11edf7187
3
+ size 418343477
smilingwolf/siglip_swinv2_base_2025_05_02_22h02m36s/meta.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_embedding_width": 1024,
3
+ "image_encoding_width": 1024,
4
+ "image_flops": 46201623808.0,
5
+ "image_params": 65930624.0,
6
+ "image_size": 448,
7
+ "logit_bias": -9.996746063232422,
8
+ "logit_scale": 5.115633487701416,
9
+ "name": null,
10
+ "repo_created_at": null,
11
+ "repo_id": null,
12
+ "text_embedding_width": 1024,
13
+ "text_encoding_width": 1024,
14
+ "text_flops": 10882646016.0,
15
+ "text_params": 85742080.0
16
+ }
smilingwolf/siglip_swinv2_base_2025_05_02_22h02m36s/preprocessor.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stages": [
3
+ {
4
+ "force_background": "white",
5
+ "type": "convert_rgb"
6
+ },
7
+ {
8
+ "antialias": true,
9
+ "interpolation": "bicubic",
10
+ "max_size": 448,
11
+ "size": 448,
12
+ "type": "resize"
13
+ },
14
+ {
15
+ "size": 448,
16
+ "type": "center_crop"
17
+ },
18
+ {
19
+ "type": "maybe_to_tensor"
20
+ },
21
+ {
22
+ "mean": [
23
+ 0.5,
24
+ 0.5,
25
+ 0.5
26
+ ],
27
+ "std": [
28
+ 0.5,
29
+ 0.5,
30
+ 0.5
31
+ ],
32
+ "type": "normalize"
33
+ }
34
+ ]
35
+ }
smilingwolf/siglip_swinv2_base_2025_05_02_22h02m36s/text_encode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:438da4d63dd23f299b2775a43115b2f4930bafe76e3ffa95996a299df95e678c
3
+ size 393616646
smilingwolf/siglip_swinv2_base_2025_05_02_22h02m36s/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff