Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint-1148/config.json +58 -0
- checkpoint-1148/model.safetensors +3 -0
- checkpoint-1148/optimizer.pt +3 -0
- checkpoint-1148/preprocessor_config.json +24 -0
- checkpoint-1148/rng_state.pth +3 -0
- checkpoint-1148/scheduler.pt +3 -0
- checkpoint-1148/trainer_state.json +88 -0
- checkpoint-1148/training_args.bin +3 -0
- checkpoint-1435/config.json +58 -0
- checkpoint-1435/model.safetensors +3 -0
- checkpoint-1435/optimizer.pt +3 -0
- checkpoint-1435/preprocessor_config.json +24 -0
- checkpoint-1435/rng_state.pth +3 -0
- checkpoint-1435/scheduler.pt +3 -0
- checkpoint-1435/trainer_state.json +98 -0
- checkpoint-1435/training_args.bin +3 -0
- checkpoint-1722/config.json +58 -0
- checkpoint-1722/model.safetensors +3 -0
- checkpoint-1722/optimizer.pt +3 -0
- checkpoint-1722/preprocessor_config.json +24 -0
- checkpoint-1722/rng_state.pth +3 -0
- checkpoint-1722/scheduler.pt +3 -0
- checkpoint-1722/trainer_state.json +115 -0
- checkpoint-1722/training_args.bin +3 -0
- checkpoint-287/config.json +58 -0
- checkpoint-287/model.safetensors +3 -0
- checkpoint-287/optimizer.pt +3 -0
- checkpoint-287/preprocessor_config.json +24 -0
- checkpoint-287/rng_state.pth +3 -0
- checkpoint-287/scheduler.pt +3 -0
- checkpoint-287/trainer_state.json +44 -0
- checkpoint-287/training_args.bin +3 -0
- checkpoint-574/config.json +58 -0
- checkpoint-574/model.safetensors +3 -0
- checkpoint-574/optimizer.pt +3 -0
- checkpoint-574/preprocessor_config.json +24 -0
- checkpoint-574/rng_state.pth +3 -0
- checkpoint-574/scheduler.pt +3 -0
- checkpoint-574/trainer_state.json +61 -0
- checkpoint-574/training_args.bin +3 -0
- checkpoint-861/config.json +58 -0
- checkpoint-861/model.safetensors +3 -0
- checkpoint-861/optimizer.pt +3 -0
- checkpoint-861/preprocessor_config.json +24 -0
- checkpoint-861/rng_state.pth +3 -0
- checkpoint-861/scheduler.pt +3 -0
- checkpoint-861/trainer_state.json +71 -0
- checkpoint-861/training_args.bin +3 -0
- config.json +58 -0
- model.safetensors +3 -0
checkpoint-1148/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-1148/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c91bad396704fe24816ee281e79890103b88ce05912af2f156526b6c09180c34
|
3 |
+
size 371586448
|
checkpoint-1148/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2509c41bb16bc61731b63da74c2a8e0bc8e15cea341c661146ec8fc610af4c53
|
3 |
+
size 686592634
|
checkpoint-1148/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-1148/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1e2ce22cd85e09a233fa80c476a2af0a599030daa001cacb597feaec8f25b19
|
3 |
+
size 14244
|
checkpoint-1148/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0f883ead62c8b6e814f3609b8fd803dce1d1f0b759c8956ef06ac0fbefc1f6a
|
3 |
+
size 1064
|
checkpoint-1148/trainer_state.json
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 1148,
|
3 |
+
"best_metric": 0.552396297454834,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-1148",
|
5 |
+
"epoch": 4.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1148,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.7421602787456445,
|
24 |
+
"grad_norm": 29.628196716308594,
|
25 |
+
"learning_rate": 1.4617224880382776e-05,
|
26 |
+
"loss": 1.0903,
|
27 |
+
"step": 500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 2.0,
|
31 |
+
"eval_accuracy": 0.6763775231860338,
|
32 |
+
"eval_loss": 0.7689628005027771,
|
33 |
+
"eval_model_preparation_time": 0.0022,
|
34 |
+
"eval_runtime": 120.8745,
|
35 |
+
"eval_samples_per_second": 75.822,
|
36 |
+
"eval_steps_per_second": 9.481,
|
37 |
+
"step": 574
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 3.0,
|
41 |
+
"eval_accuracy": 0.7290780141843972,
|
42 |
+
"eval_loss": 0.6629450917243958,
|
43 |
+
"eval_model_preparation_time": 0.0022,
|
44 |
+
"eval_runtime": 121.8989,
|
45 |
+
"eval_samples_per_second": 75.185,
|
46 |
+
"eval_steps_per_second": 9.401,
|
47 |
+
"step": 861
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.484320557491289,
|
51 |
+
"grad_norm": 19.66444969177246,
|
52 |
+
"learning_rate": 8.636363636363637e-06,
|
53 |
+
"loss": 0.7613,
|
54 |
+
"step": 1000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 4.0,
|
58 |
+
"eval_accuracy": 0.7742498636115658,
|
59 |
+
"eval_loss": 0.552396297454834,
|
60 |
+
"eval_model_preparation_time": 0.0022,
|
61 |
+
"eval_runtime": 121.0546,
|
62 |
+
"eval_samples_per_second": 75.71,
|
63 |
+
"eval_steps_per_second": 9.467,
|
64 |
+
"step": 1148
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"logging_steps": 500,
|
68 |
+
"max_steps": 1722,
|
69 |
+
"num_input_tokens_seen": 0,
|
70 |
+
"num_train_epochs": 6,
|
71 |
+
"save_steps": 500,
|
72 |
+
"stateful_callbacks": {
|
73 |
+
"TrainerControl": {
|
74 |
+
"args": {
|
75 |
+
"should_epoch_stop": false,
|
76 |
+
"should_evaluate": false,
|
77 |
+
"should_log": false,
|
78 |
+
"should_save": true,
|
79 |
+
"should_training_stop": false
|
80 |
+
},
|
81 |
+
"attributes": {}
|
82 |
+
}
|
83 |
+
},
|
84 |
+
"total_flos": 3.07062924699009e+18,
|
85 |
+
"train_batch_size": 32,
|
86 |
+
"trial_name": null,
|
87 |
+
"trial_params": null
|
88 |
+
}
|
checkpoint-1148/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
checkpoint-1435/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-1435/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60815197d8350ca8988443263b8dd4fd1e0639d78bcf974a34640a0d31338072
|
3 |
+
size 371586448
|
checkpoint-1435/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf242e6a4fbd6c9ed7bcf994732bdaa86cf58f5c1c794f17d57afa61bf35d859
|
3 |
+
size 686592634
|
checkpoint-1435/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-1435/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ff1c8e5adc3fe87431248921d26da697ec61f8556617d529523e88556b17fd8
|
3 |
+
size 14244
|
checkpoint-1435/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da2f0a1afe1cb2a189595ba315a20c9a2151481fdc442ba05e79e70f619b0625
|
3 |
+
size 1064
|
checkpoint-1435/trainer_state.json
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 1435,
|
3 |
+
"best_metric": 0.4922109544277191,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-1435",
|
5 |
+
"epoch": 5.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1435,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.7421602787456445,
|
24 |
+
"grad_norm": 29.628196716308594,
|
25 |
+
"learning_rate": 1.4617224880382776e-05,
|
26 |
+
"loss": 1.0903,
|
27 |
+
"step": 500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 2.0,
|
31 |
+
"eval_accuracy": 0.6763775231860338,
|
32 |
+
"eval_loss": 0.7689628005027771,
|
33 |
+
"eval_model_preparation_time": 0.0022,
|
34 |
+
"eval_runtime": 120.8745,
|
35 |
+
"eval_samples_per_second": 75.822,
|
36 |
+
"eval_steps_per_second": 9.481,
|
37 |
+
"step": 574
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 3.0,
|
41 |
+
"eval_accuracy": 0.7290780141843972,
|
42 |
+
"eval_loss": 0.6629450917243958,
|
43 |
+
"eval_model_preparation_time": 0.0022,
|
44 |
+
"eval_runtime": 121.8989,
|
45 |
+
"eval_samples_per_second": 75.185,
|
46 |
+
"eval_steps_per_second": 9.401,
|
47 |
+
"step": 861
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.484320557491289,
|
51 |
+
"grad_norm": 19.66444969177246,
|
52 |
+
"learning_rate": 8.636363636363637e-06,
|
53 |
+
"loss": 0.7613,
|
54 |
+
"step": 1000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 4.0,
|
58 |
+
"eval_accuracy": 0.7742498636115658,
|
59 |
+
"eval_loss": 0.552396297454834,
|
60 |
+
"eval_model_preparation_time": 0.0022,
|
61 |
+
"eval_runtime": 121.0546,
|
62 |
+
"eval_samples_per_second": 75.71,
|
63 |
+
"eval_steps_per_second": 9.467,
|
64 |
+
"step": 1148
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 5.0,
|
68 |
+
"eval_accuracy": 0.8007637752318604,
|
69 |
+
"eval_loss": 0.4922109544277191,
|
70 |
+
"eval_model_preparation_time": 0.0022,
|
71 |
+
"eval_runtime": 121.4614,
|
72 |
+
"eval_samples_per_second": 75.456,
|
73 |
+
"eval_steps_per_second": 9.435,
|
74 |
+
"step": 1435
|
75 |
+
}
|
76 |
+
],
|
77 |
+
"logging_steps": 500,
|
78 |
+
"max_steps": 1722,
|
79 |
+
"num_input_tokens_seen": 0,
|
80 |
+
"num_train_epochs": 6,
|
81 |
+
"save_steps": 500,
|
82 |
+
"stateful_callbacks": {
|
83 |
+
"TrainerControl": {
|
84 |
+
"args": {
|
85 |
+
"should_epoch_stop": false,
|
86 |
+
"should_evaluate": false,
|
87 |
+
"should_log": false,
|
88 |
+
"should_save": true,
|
89 |
+
"should_training_stop": false
|
90 |
+
},
|
91 |
+
"attributes": {}
|
92 |
+
}
|
93 |
+
},
|
94 |
+
"total_flos": 3.838286558737613e+18,
|
95 |
+
"train_batch_size": 32,
|
96 |
+
"trial_name": null,
|
97 |
+
"trial_params": null
|
98 |
+
}
|
checkpoint-1435/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
checkpoint-1722/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-1722/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbe2101f827826eba65938ed4e574788e3bf3542a179b1d352c2f8b255452c68
|
3 |
+
size 371586448
|
checkpoint-1722/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b3164c6b6dc3361834336c5e8a99a84eee6b53526535444cf0c2972cb80621d
|
3 |
+
size 686592634
|
checkpoint-1722/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-1722/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f0d1da9544d8d5ff8def792568fe988e57addc62b4849048df7840c76f70476
|
3 |
+
size 14244
|
checkpoint-1722/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea0234fc2a4b91785bb19aa8cb52ba790ab2917af4d2023a186cdee4611c5571
|
3 |
+
size 1064
|
checkpoint-1722/trainer_state.json
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 1722,
|
3 |
+
"best_metric": 0.44515112042427063,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-1722",
|
5 |
+
"epoch": 6.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1722,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.7421602787456445,
|
24 |
+
"grad_norm": 29.628196716308594,
|
25 |
+
"learning_rate": 1.4617224880382776e-05,
|
26 |
+
"loss": 1.0903,
|
27 |
+
"step": 500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 2.0,
|
31 |
+
"eval_accuracy": 0.6763775231860338,
|
32 |
+
"eval_loss": 0.7689628005027771,
|
33 |
+
"eval_model_preparation_time": 0.0022,
|
34 |
+
"eval_runtime": 120.8745,
|
35 |
+
"eval_samples_per_second": 75.822,
|
36 |
+
"eval_steps_per_second": 9.481,
|
37 |
+
"step": 574
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 3.0,
|
41 |
+
"eval_accuracy": 0.7290780141843972,
|
42 |
+
"eval_loss": 0.6629450917243958,
|
43 |
+
"eval_model_preparation_time": 0.0022,
|
44 |
+
"eval_runtime": 121.8989,
|
45 |
+
"eval_samples_per_second": 75.185,
|
46 |
+
"eval_steps_per_second": 9.401,
|
47 |
+
"step": 861
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.484320557491289,
|
51 |
+
"grad_norm": 19.66444969177246,
|
52 |
+
"learning_rate": 8.636363636363637e-06,
|
53 |
+
"loss": 0.7613,
|
54 |
+
"step": 1000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 4.0,
|
58 |
+
"eval_accuracy": 0.7742498636115658,
|
59 |
+
"eval_loss": 0.552396297454834,
|
60 |
+
"eval_model_preparation_time": 0.0022,
|
61 |
+
"eval_runtime": 121.0546,
|
62 |
+
"eval_samples_per_second": 75.71,
|
63 |
+
"eval_steps_per_second": 9.467,
|
64 |
+
"step": 1148
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 5.0,
|
68 |
+
"eval_accuracy": 0.8007637752318604,
|
69 |
+
"eval_loss": 0.4922109544277191,
|
70 |
+
"eval_model_preparation_time": 0.0022,
|
71 |
+
"eval_runtime": 121.4614,
|
72 |
+
"eval_samples_per_second": 75.456,
|
73 |
+
"eval_steps_per_second": 9.435,
|
74 |
+
"step": 1435
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 5.2264808362369335,
|
78 |
+
"grad_norm": 19.047819137573242,
|
79 |
+
"learning_rate": 2.6555023923444976e-06,
|
80 |
+
"loss": 0.5947,
|
81 |
+
"step": 1500
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.0,
|
85 |
+
"eval_accuracy": 0.8224768139661757,
|
86 |
+
"eval_loss": 0.44515112042427063,
|
87 |
+
"eval_model_preparation_time": 0.0022,
|
88 |
+
"eval_runtime": 121.6668,
|
89 |
+
"eval_samples_per_second": 75.329,
|
90 |
+
"eval_steps_per_second": 9.419,
|
91 |
+
"step": 1722
|
92 |
+
}
|
93 |
+
],
|
94 |
+
"logging_steps": 500,
|
95 |
+
"max_steps": 1722,
|
96 |
+
"num_input_tokens_seen": 0,
|
97 |
+
"num_train_epochs": 6,
|
98 |
+
"save_steps": 500,
|
99 |
+
"stateful_callbacks": {
|
100 |
+
"TrainerControl": {
|
101 |
+
"args": {
|
102 |
+
"should_epoch_stop": false,
|
103 |
+
"should_evaluate": false,
|
104 |
+
"should_log": false,
|
105 |
+
"should_save": true,
|
106 |
+
"should_training_stop": true
|
107 |
+
},
|
108 |
+
"attributes": {}
|
109 |
+
}
|
110 |
+
},
|
111 |
+
"total_flos": 4.6059438704851354e+18,
|
112 |
+
"train_batch_size": 32,
|
113 |
+
"trial_name": null,
|
114 |
+
"trial_params": null
|
115 |
+
}
|
checkpoint-1722/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
checkpoint-287/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-287/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a0b35c5f7124fb8546a6c7ae1c7b6f6083f2d7e1e9c16d3a329713224394019
|
3 |
+
size 371586448
|
checkpoint-287/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae3443c600a9856e1f0b8786275ac05174ed1e7ada730b00ea5fe1318ebd52f
|
3 |
+
size 686592634
|
checkpoint-287/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-287/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94d4167eb9695efadf5fae0f6733f49c4d2e0f8b027357dff8baf53ea791bcd3
|
3 |
+
size 14244
|
checkpoint-287/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a0f22126888db25140d8ca37d26bccded9119e4cfd672c9f9f6201bb8f6f1c5
|
3 |
+
size 1064
|
checkpoint-287/trainer_state.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 287,
|
3 |
+
"best_metric": 0.8569742441177368,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-287",
|
5 |
+
"epoch": 1.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 287,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"logging_steps": 500,
|
24 |
+
"max_steps": 1722,
|
25 |
+
"num_input_tokens_seen": 0,
|
26 |
+
"num_train_epochs": 6,
|
27 |
+
"save_steps": 500,
|
28 |
+
"stateful_callbacks": {
|
29 |
+
"TrainerControl": {
|
30 |
+
"args": {
|
31 |
+
"should_epoch_stop": false,
|
32 |
+
"should_evaluate": false,
|
33 |
+
"should_log": false,
|
34 |
+
"should_save": true,
|
35 |
+
"should_training_stop": false
|
36 |
+
},
|
37 |
+
"attributes": {}
|
38 |
+
}
|
39 |
+
},
|
40 |
+
"total_flos": 7.676573117475226e+17,
|
41 |
+
"train_batch_size": 32,
|
42 |
+
"trial_name": null,
|
43 |
+
"trial_params": null
|
44 |
+
}
|
checkpoint-287/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
checkpoint-574/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-574/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc0953884c77b7a37881be4dd541352f2cad1945464a43364873cc076e0474ee
|
3 |
+
size 371586448
|
checkpoint-574/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f62b0dfe1f1e8bcd7b731e9369c877643cac03453ab9ce228856819b7e3ada1
|
3 |
+
size 686592634
|
checkpoint-574/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-574/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf6fe4a70f9d3cd69383e5df6382774b34cffad85ee54931a76fb679e1b60fb1
|
3 |
+
size 14244
|
checkpoint-574/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8341dbc20e8d09dcd9180a96b9ad1abbf0b1b531833e452432c4e8c332dd8f7
|
3 |
+
size 1064
|
checkpoint-574/trainer_state.json
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 574,
|
3 |
+
"best_metric": 0.7689628005027771,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-574",
|
5 |
+
"epoch": 2.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 574,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.7421602787456445,
|
24 |
+
"grad_norm": 29.628196716308594,
|
25 |
+
"learning_rate": 1.4617224880382776e-05,
|
26 |
+
"loss": 1.0903,
|
27 |
+
"step": 500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 2.0,
|
31 |
+
"eval_accuracy": 0.6763775231860338,
|
32 |
+
"eval_loss": 0.7689628005027771,
|
33 |
+
"eval_model_preparation_time": 0.0022,
|
34 |
+
"eval_runtime": 120.8745,
|
35 |
+
"eval_samples_per_second": 75.822,
|
36 |
+
"eval_steps_per_second": 9.481,
|
37 |
+
"step": 574
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"logging_steps": 500,
|
41 |
+
"max_steps": 1722,
|
42 |
+
"num_input_tokens_seen": 0,
|
43 |
+
"num_train_epochs": 6,
|
44 |
+
"save_steps": 500,
|
45 |
+
"stateful_callbacks": {
|
46 |
+
"TrainerControl": {
|
47 |
+
"args": {
|
48 |
+
"should_epoch_stop": false,
|
49 |
+
"should_evaluate": false,
|
50 |
+
"should_log": false,
|
51 |
+
"should_save": true,
|
52 |
+
"should_training_stop": false
|
53 |
+
},
|
54 |
+
"attributes": {}
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"total_flos": 1.535314623495045e+18,
|
58 |
+
"train_batch_size": 32,
|
59 |
+
"trial_name": null,
|
60 |
+
"trial_params": null
|
61 |
+
}
|
checkpoint-574/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
checkpoint-861/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
checkpoint-861/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bed26da0481a45b099c675ad3c774564a30c28e5299a5e1d5102d95f29b8ca4f
|
3 |
+
size 371586448
|
checkpoint-861/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e397966ce338e6ad068b1dc8ce09b7e326a0238b9fb608004e68168720fd89b7
|
3 |
+
size 686592634
|
checkpoint-861/preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": null,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_processor_type": "SiglipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.5,
|
14 |
+
0.5,
|
15 |
+
0.5
|
16 |
+
],
|
17 |
+
"processor_class": "SiglipProcessor",
|
18 |
+
"resample": 2,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 224,
|
22 |
+
"width": 224
|
23 |
+
}
|
24 |
+
}
|
checkpoint-861/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a303624aee1ee47e399d7a0dc5ab60456d7cbea78de10f3b58c1e0fc1ac8f39b
|
3 |
+
size 14244
|
checkpoint-861/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aa40d74352b0a1a5efcc83cd3091bb039d649e19ab7a1d651fd00c984213270
|
3 |
+
size 1064
|
checkpoint-861/trainer_state.json
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 861,
|
3 |
+
"best_metric": 0.6629450917243958,
|
4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-861",
|
5 |
+
"epoch": 3.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 861,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.646481178396072,
|
15 |
+
"eval_loss": 0.8569742441177368,
|
16 |
+
"eval_model_preparation_time": 0.0022,
|
17 |
+
"eval_runtime": 120.9539,
|
18 |
+
"eval_samples_per_second": 75.773,
|
19 |
+
"eval_steps_per_second": 9.475,
|
20 |
+
"step": 287
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.7421602787456445,
|
24 |
+
"grad_norm": 29.628196716308594,
|
25 |
+
"learning_rate": 1.4617224880382776e-05,
|
26 |
+
"loss": 1.0903,
|
27 |
+
"step": 500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 2.0,
|
31 |
+
"eval_accuracy": 0.6763775231860338,
|
32 |
+
"eval_loss": 0.7689628005027771,
|
33 |
+
"eval_model_preparation_time": 0.0022,
|
34 |
+
"eval_runtime": 120.8745,
|
35 |
+
"eval_samples_per_second": 75.822,
|
36 |
+
"eval_steps_per_second": 9.481,
|
37 |
+
"step": 574
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 3.0,
|
41 |
+
"eval_accuracy": 0.7290780141843972,
|
42 |
+
"eval_loss": 0.6629450917243958,
|
43 |
+
"eval_model_preparation_time": 0.0022,
|
44 |
+
"eval_runtime": 121.8989,
|
45 |
+
"eval_samples_per_second": 75.185,
|
46 |
+
"eval_steps_per_second": 9.401,
|
47 |
+
"step": 861
|
48 |
+
}
|
49 |
+
],
|
50 |
+
"logging_steps": 500,
|
51 |
+
"max_steps": 1722,
|
52 |
+
"num_input_tokens_seen": 0,
|
53 |
+
"num_train_epochs": 6,
|
54 |
+
"save_steps": 500,
|
55 |
+
"stateful_callbacks": {
|
56 |
+
"TrainerControl": {
|
57 |
+
"args": {
|
58 |
+
"should_epoch_stop": false,
|
59 |
+
"should_evaluate": false,
|
60 |
+
"should_log": false,
|
61 |
+
"should_save": true,
|
62 |
+
"should_training_stop": false
|
63 |
+
},
|
64 |
+
"attributes": {}
|
65 |
+
}
|
66 |
+
},
|
67 |
+
"total_flos": 2.3029719352425677e+18,
|
68 |
+
"train_batch_size": 32,
|
69 |
+
"trial_name": null,
|
70 |
+
"trial_params": null
|
71 |
+
}
|
checkpoint-861/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a497a9eb5e81ae5cb20a2ceefdd1321aa32c6b27b5c4bd8f2816c6cf571b8aed
|
3 |
+
size 5304
|
config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SiglipForImageClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "age 01-10",
|
7 |
+
"1": "age 11-20",
|
8 |
+
"2": "age 21-30",
|
9 |
+
"3": "age 31-40",
|
10 |
+
"4": "age 41-55",
|
11 |
+
"5": "age 56-65",
|
12 |
+
"6": "age 66-80",
|
13 |
+
"7": "age 80 +"
|
14 |
+
},
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"label2id": {
|
17 |
+
"age 01-10": 0,
|
18 |
+
"age 11-20": 1,
|
19 |
+
"age 21-30": 2,
|
20 |
+
"age 31-40": 3,
|
21 |
+
"age 41-55": 4,
|
22 |
+
"age 56-65": 5,
|
23 |
+
"age 66-80": 6,
|
24 |
+
"age 80 +": 7
|
25 |
+
},
|
26 |
+
"model_type": "siglip",
|
27 |
+
"problem_type": "single_label_classification",
|
28 |
+
"text_config": {
|
29 |
+
"attention_dropout": 0.0,
|
30 |
+
"hidden_act": "gelu_pytorch_tanh",
|
31 |
+
"hidden_size": 768,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"layer_norm_eps": 1e-06,
|
34 |
+
"max_position_embeddings": 64,
|
35 |
+
"model_type": "siglip_text_model",
|
36 |
+
"num_attention_heads": 12,
|
37 |
+
"num_hidden_layers": 12,
|
38 |
+
"projection_size": 768,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"vocab_size": 256000
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.50.0",
|
44 |
+
"vision_config": {
|
45 |
+
"attention_dropout": 0.0,
|
46 |
+
"hidden_act": "gelu_pytorch_tanh",
|
47 |
+
"hidden_size": 768,
|
48 |
+
"image_size": 224,
|
49 |
+
"intermediate_size": 3072,
|
50 |
+
"layer_norm_eps": 1e-06,
|
51 |
+
"model_type": "siglip_vision_model",
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_channels": 3,
|
54 |
+
"num_hidden_layers": 12,
|
55 |
+
"patch_size": 16,
|
56 |
+
"torch_dtype": "float32"
|
57 |
+
}
|
58 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbe2101f827826eba65938ed4e574788e3bf3542a179b1d352c2f8b255452c68
|
3 |
+
size 371586448
|