prithivMLmods commited on
Commit
e551b5b
·
verified ·
1 Parent(s): d596eba

Upload folder using huggingface_hub

Browse files
checkpoint-196/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "aluminium",
7
+ "1": "batteries",
8
+ "2": "cardboard",
9
+ "3": "disposable plates",
10
+ "4": "glass",
11
+ "5": "hard plastic",
12
+ "6": "paper",
13
+ "7": "paper towel",
14
+ "8": "polystyrene",
15
+ "9": "soft plastics",
16
+ "10": "takeaway cups"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "aluminium": 0,
21
+ "batteries": 1,
22
+ "cardboard": 2,
23
+ "disposable plates": 3,
24
+ "glass": 4,
25
+ "hard plastic": 5,
26
+ "paper": 6,
27
+ "paper towel": 7,
28
+ "polystyrene": 8,
29
+ "soft plastics": 9,
30
+ "takeaway cups": 10
31
+ },
32
+ "model_type": "siglip",
33
+ "problem_type": "single_label_classification",
34
+ "text_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "max_position_embeddings": 64,
41
+ "model_type": "siglip_text_model",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "projection_size": 768,
45
+ "torch_dtype": "float32",
46
+ "vocab_size": 256000
47
+ },
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.50.3",
50
+ "vision_config": {
51
+ "attention_dropout": 0.0,
52
+ "hidden_act": "gelu_pytorch_tanh",
53
+ "hidden_size": 768,
54
+ "image_size": 224,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-06,
57
+ "model_type": "siglip_vision_model",
58
+ "num_attention_heads": 12,
59
+ "num_channels": 3,
60
+ "num_hidden_layers": 12,
61
+ "patch_size": 16,
62
+ "torch_dtype": "float32"
63
+ }
64
+ }
checkpoint-196/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154600f2551ebc11251d99697e55897eb819b9329ae87c6a4c7c13f69aa49f47
3
+ size 371595684
checkpoint-196/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe56d0ed466998df88e81328a4ad1abe88c3c454ab8b7039d04d7b26dc8790c
3
+ size 686611066
checkpoint-196/preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
checkpoint-196/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004df34a3ecda54f9e180f234d7b9fd9161a3667a67b4f4f0e55ad064c2dcf82
3
+ size 14244
checkpoint-196/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94901d94091b9ea4ab6e97b1f3d86887fe7646a15815e28ca5383ef10a6e3d5
3
+ size 1064
checkpoint-196/trainer_state.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 196,
3
+ "best_metric": 1.1491700410842896,
4
+ "best_model_checkpoint": "siglip2-finetune-full/checkpoint-196",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 196,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5371741229481816,
15
+ "eval_loss": 1.4491065740585327,
16
+ "eval_model_preparation_time": 0.0042,
17
+ "eval_runtime": 43.5111,
18
+ "eval_samples_per_second": 71.407,
19
+ "eval_steps_per_second": 8.94,
20
+ "step": 98
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.6253620856131317,
25
+ "eval_loss": 1.1491700410842896,
26
+ "eval_model_preparation_time": 0.0042,
27
+ "eval_runtime": 43.8413,
28
+ "eval_samples_per_second": 70.869,
29
+ "eval_steps_per_second": 8.873,
30
+ "step": 196
31
+ }
32
+ ],
33
+ "logging_steps": 500,
34
+ "max_steps": 392,
35
+ "num_input_tokens_seen": 0,
36
+ "num_train_epochs": 4,
37
+ "save_steps": 500,
38
+ "stateful_callbacks": {
39
+ "TrainerControl": {
40
+ "args": {
41
+ "should_epoch_stop": false,
42
+ "should_evaluate": false,
43
+ "should_log": false,
44
+ "should_save": true,
45
+ "should_training_stop": false
46
+ },
47
+ "attributes": {}
48
+ }
49
+ },
50
+ "total_flos": 5.2049549366470656e+17,
51
+ "train_batch_size": 32,
52
+ "trial_name": null,
53
+ "trial_params": null
54
+ }
checkpoint-196/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204006bafac16586eef5c520416ddc231ccdf5a76f9bffe88756655e3676d429
3
+ size 5304
checkpoint-294/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "aluminium",
7
+ "1": "batteries",
8
+ "2": "cardboard",
9
+ "3": "disposable plates",
10
+ "4": "glass",
11
+ "5": "hard plastic",
12
+ "6": "paper",
13
+ "7": "paper towel",
14
+ "8": "polystyrene",
15
+ "9": "soft plastics",
16
+ "10": "takeaway cups"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "aluminium": 0,
21
+ "batteries": 1,
22
+ "cardboard": 2,
23
+ "disposable plates": 3,
24
+ "glass": 4,
25
+ "hard plastic": 5,
26
+ "paper": 6,
27
+ "paper towel": 7,
28
+ "polystyrene": 8,
29
+ "soft plastics": 9,
30
+ "takeaway cups": 10
31
+ },
32
+ "model_type": "siglip",
33
+ "problem_type": "single_label_classification",
34
+ "text_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "max_position_embeddings": 64,
41
+ "model_type": "siglip_text_model",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "projection_size": 768,
45
+ "torch_dtype": "float32",
46
+ "vocab_size": 256000
47
+ },
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.50.3",
50
+ "vision_config": {
51
+ "attention_dropout": 0.0,
52
+ "hidden_act": "gelu_pytorch_tanh",
53
+ "hidden_size": 768,
54
+ "image_size": 224,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-06,
57
+ "model_type": "siglip_vision_model",
58
+ "num_attention_heads": 12,
59
+ "num_channels": 3,
60
+ "num_hidden_layers": 12,
61
+ "patch_size": 16,
62
+ "torch_dtype": "float32"
63
+ }
64
+ }
checkpoint-294/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56c237b9c504b568083ae1484108a9a771ff16319fe8af90331fbe54aab4305
3
+ size 371595684
checkpoint-294/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1732aca89f62654193d006c34bb9144cc357d8d4f230b36523c1e5e547755c2
3
+ size 686611066
checkpoint-294/preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
checkpoint-294/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff04b848d3c1e2961cec59e4d9a5a7b4656d7a2f267d542bb569b704754a776
3
+ size 14244
checkpoint-294/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d9f93682dbd755459f28157591564ea442a2e9a0b89dee57afac34b9ed7433
3
+ size 1064
checkpoint-294/trainer_state.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 294,
3
+ "best_metric": 0.6238726377487183,
4
+ "best_model_checkpoint": "siglip2-finetune-full/checkpoint-294",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 294,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5371741229481816,
15
+ "eval_loss": 1.4491065740585327,
16
+ "eval_model_preparation_time": 0.0042,
17
+ "eval_runtime": 43.5111,
18
+ "eval_samples_per_second": 71.407,
19
+ "eval_steps_per_second": 8.94,
20
+ "step": 98
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.6253620856131317,
25
+ "eval_loss": 1.1491700410842896,
26
+ "eval_model_preparation_time": 0.0042,
27
+ "eval_runtime": 43.8413,
28
+ "eval_samples_per_second": 70.869,
29
+ "eval_steps_per_second": 8.873,
30
+ "step": 196
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_accuracy": 0.7911168329578372,
35
+ "eval_loss": 0.6238726377487183,
36
+ "eval_model_preparation_time": 0.0042,
37
+ "eval_runtime": 44.3331,
38
+ "eval_samples_per_second": 70.083,
39
+ "eval_steps_per_second": 8.774,
40
+ "step": 294
41
+ }
42
+ ],
43
+ "logging_steps": 500,
44
+ "max_steps": 392,
45
+ "num_input_tokens_seen": 0,
46
+ "num_train_epochs": 4,
47
+ "save_steps": 500,
48
+ "stateful_callbacks": {
49
+ "TrainerControl": {
50
+ "args": {
51
+ "should_epoch_stop": false,
52
+ "should_evaluate": false,
53
+ "should_log": false,
54
+ "should_save": true,
55
+ "should_training_stop": false
56
+ },
57
+ "attributes": {}
58
+ }
59
+ },
60
+ "total_flos": 7.807432404970598e+17,
61
+ "train_batch_size": 32,
62
+ "trial_name": null,
63
+ "trial_params": null
64
+ }
checkpoint-294/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204006bafac16586eef5c520416ddc231ccdf5a76f9bffe88756655e3676d429
3
+ size 5304
checkpoint-392/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "aluminium",
7
+ "1": "batteries",
8
+ "2": "cardboard",
9
+ "3": "disposable plates",
10
+ "4": "glass",
11
+ "5": "hard plastic",
12
+ "6": "paper",
13
+ "7": "paper towel",
14
+ "8": "polystyrene",
15
+ "9": "soft plastics",
16
+ "10": "takeaway cups"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "aluminium": 0,
21
+ "batteries": 1,
22
+ "cardboard": 2,
23
+ "disposable plates": 3,
24
+ "glass": 4,
25
+ "hard plastic": 5,
26
+ "paper": 6,
27
+ "paper towel": 7,
28
+ "polystyrene": 8,
29
+ "soft plastics": 9,
30
+ "takeaway cups": 10
31
+ },
32
+ "model_type": "siglip",
33
+ "problem_type": "single_label_classification",
34
+ "text_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "max_position_embeddings": 64,
41
+ "model_type": "siglip_text_model",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "projection_size": 768,
45
+ "torch_dtype": "float32",
46
+ "vocab_size": 256000
47
+ },
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.50.3",
50
+ "vision_config": {
51
+ "attention_dropout": 0.0,
52
+ "hidden_act": "gelu_pytorch_tanh",
53
+ "hidden_size": 768,
54
+ "image_size": 224,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-06,
57
+ "model_type": "siglip_vision_model",
58
+ "num_attention_heads": 12,
59
+ "num_channels": 3,
60
+ "num_hidden_layers": 12,
61
+ "patch_size": 16,
62
+ "torch_dtype": "float32"
63
+ }
64
+ }
checkpoint-392/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ead668220b943e855bc43c571442d7d3b5be7be906159f261e2b23b01794747
3
+ size 371595684
checkpoint-392/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36c03232783a4af944afea24a8f7aab78d509f419b98d6d4ec26c2547f385d6
3
+ size 686611066
checkpoint-392/preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
checkpoint-392/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ac1c9185ec49f91ee47c195c62597302ab943b0d057e0b6986f3a41a4ca036b
3
+ size 14244
checkpoint-392/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d1575ba343743d6d12fe46d632856e634c19206b18e40b4358d6721f92f161
3
+ size 1064
checkpoint-392/trainer_state.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 392,
3
+ "best_metric": 0.2776739001274109,
4
+ "best_model_checkpoint": "siglip2-finetune-full/checkpoint-392",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 392,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5371741229481816,
15
+ "eval_loss": 1.4491065740585327,
16
+ "eval_model_preparation_time": 0.0042,
17
+ "eval_runtime": 43.5111,
18
+ "eval_samples_per_second": 71.407,
19
+ "eval_steps_per_second": 8.94,
20
+ "step": 98
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.6253620856131317,
25
+ "eval_loss": 1.1491700410842896,
26
+ "eval_model_preparation_time": 0.0042,
27
+ "eval_runtime": 43.8413,
28
+ "eval_samples_per_second": 70.869,
29
+ "eval_steps_per_second": 8.873,
30
+ "step": 196
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_accuracy": 0.7911168329578372,
35
+ "eval_loss": 0.6238726377487183,
36
+ "eval_model_preparation_time": 0.0042,
37
+ "eval_runtime": 44.3331,
38
+ "eval_samples_per_second": 70.083,
39
+ "eval_steps_per_second": 8.774,
40
+ "step": 294
41
+ },
42
+ {
43
+ "epoch": 4.0,
44
+ "eval_accuracy": 0.9127775989700676,
45
+ "eval_loss": 0.2776739001274109,
46
+ "eval_model_preparation_time": 0.0042,
47
+ "eval_runtime": 44.1936,
48
+ "eval_samples_per_second": 70.304,
49
+ "eval_steps_per_second": 8.802,
50
+ "step": 392
51
+ }
52
+ ],
53
+ "logging_steps": 500,
54
+ "max_steps": 392,
55
+ "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 4,
57
+ "save_steps": 500,
58
+ "stateful_callbacks": {
59
+ "TrainerControl": {
60
+ "args": {
61
+ "should_epoch_stop": false,
62
+ "should_evaluate": false,
63
+ "should_log": false,
64
+ "should_save": true,
65
+ "should_training_stop": true
66
+ },
67
+ "attributes": {}
68
+ }
69
+ },
70
+ "total_flos": 1.0409909873294131e+18,
71
+ "train_batch_size": 32,
72
+ "trial_name": null,
73
+ "trial_params": null
74
+ }
checkpoint-392/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204006bafac16586eef5c520416ddc231ccdf5a76f9bffe88756655e3676d429
3
+ size 5304
checkpoint-98/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "aluminium",
7
+ "1": "batteries",
8
+ "2": "cardboard",
9
+ "3": "disposable plates",
10
+ "4": "glass",
11
+ "5": "hard plastic",
12
+ "6": "paper",
13
+ "7": "paper towel",
14
+ "8": "polystyrene",
15
+ "9": "soft plastics",
16
+ "10": "takeaway cups"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "aluminium": 0,
21
+ "batteries": 1,
22
+ "cardboard": 2,
23
+ "disposable plates": 3,
24
+ "glass": 4,
25
+ "hard plastic": 5,
26
+ "paper": 6,
27
+ "paper towel": 7,
28
+ "polystyrene": 8,
29
+ "soft plastics": 9,
30
+ "takeaway cups": 10
31
+ },
32
+ "model_type": "siglip",
33
+ "problem_type": "single_label_classification",
34
+ "text_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "max_position_embeddings": 64,
41
+ "model_type": "siglip_text_model",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "projection_size": 768,
45
+ "torch_dtype": "float32",
46
+ "vocab_size": 256000
47
+ },
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.50.3",
50
+ "vision_config": {
51
+ "attention_dropout": 0.0,
52
+ "hidden_act": "gelu_pytorch_tanh",
53
+ "hidden_size": 768,
54
+ "image_size": 224,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-06,
57
+ "model_type": "siglip_vision_model",
58
+ "num_attention_heads": 12,
59
+ "num_channels": 3,
60
+ "num_hidden_layers": 12,
61
+ "patch_size": 16,
62
+ "torch_dtype": "float32"
63
+ }
64
+ }
checkpoint-98/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702039b63fb5c1dd75b9a2313f7112309611886b89a56c3a6f530d206f905699
3
+ size 371595684
checkpoint-98/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8d249e216fadfd52d9b29e1ddf3ed832940e9eec2709ca17120114d80f38d0
3
+ size 686611066
checkpoint-98/preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
checkpoint-98/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae860a1d1076ef7373890c1a99b317f6e7ac7f9868059808b708672e7dd008ab
3
+ size 14244
checkpoint-98/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6be97bbe4897efead06f234be9c9358deb73c8c35054366d143ad19872b778
3
+ size 1064
checkpoint-98/trainer_state.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 98,
3
+ "best_metric": 1.4491065740585327,
4
+ "best_model_checkpoint": "siglip2-finetune-full/checkpoint-98",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 98,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5371741229481816,
15
+ "eval_loss": 1.4491065740585327,
16
+ "eval_model_preparation_time": 0.0042,
17
+ "eval_runtime": 43.5111,
18
+ "eval_samples_per_second": 71.407,
19
+ "eval_steps_per_second": 8.94,
20
+ "step": 98
21
+ }
22
+ ],
23
+ "logging_steps": 500,
24
+ "max_steps": 392,
25
+ "num_input_tokens_seen": 0,
26
+ "num_train_epochs": 4,
27
+ "save_steps": 500,
28
+ "stateful_callbacks": {
29
+ "TrainerControl": {
30
+ "args": {
31
+ "should_epoch_stop": false,
32
+ "should_evaluate": false,
33
+ "should_log": false,
34
+ "should_save": true,
35
+ "should_training_stop": false
36
+ },
37
+ "attributes": {}
38
+ }
39
+ },
40
+ "total_flos": 2.6024774683235328e+17,
41
+ "train_batch_size": 32,
42
+ "trial_name": null,
43
+ "trial_params": null
44
+ }
checkpoint-98/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204006bafac16586eef5c520416ddc231ccdf5a76f9bffe88756655e3676d429
3
+ size 5304
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "aluminium",
7
+ "1": "batteries",
8
+ "2": "cardboard",
9
+ "3": "disposable plates",
10
+ "4": "glass",
11
+ "5": "hard plastic",
12
+ "6": "paper",
13
+ "7": "paper towel",
14
+ "8": "polystyrene",
15
+ "9": "soft plastics",
16
+ "10": "takeaway cups"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "aluminium": 0,
21
+ "batteries": 1,
22
+ "cardboard": 2,
23
+ "disposable plates": 3,
24
+ "glass": 4,
25
+ "hard plastic": 5,
26
+ "paper": 6,
27
+ "paper towel": 7,
28
+ "polystyrene": 8,
29
+ "soft plastics": 9,
30
+ "takeaway cups": 10
31
+ },
32
+ "model_type": "siglip",
33
+ "problem_type": "single_label_classification",
34
+ "text_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "max_position_embeddings": 64,
41
+ "model_type": "siglip_text_model",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 12,
44
+ "projection_size": 768,
45
+ "torch_dtype": "float32",
46
+ "vocab_size": 256000
47
+ },
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.50.3",
50
+ "vision_config": {
51
+ "attention_dropout": 0.0,
52
+ "hidden_act": "gelu_pytorch_tanh",
53
+ "hidden_size": 768,
54
+ "image_size": 224,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-06,
57
+ "model_type": "siglip_vision_model",
58
+ "num_attention_heads": 12,
59
+ "num_channels": 3,
60
+ "num_hidden_layers": 12,
61
+ "patch_size": 16,
62
+ "torch_dtype": "float32"
63
+ }
64
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ead668220b943e855bc43c571442d7d3b5be7be906159f261e2b23b01794747
3
+ size 371595684
preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204006bafac16586eef5c520416ddc231ccdf5a76f9bffe88756655e3676d429
3
+ size 5304