alyzbane commited on
Commit
9162f7d
·
verified ·
1 Parent(s): e0c2aba

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ integrated_gradients_grid.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -4,8 +4,6 @@ license: apache-2.0
4
  base_model: facebook/convnext-tiny-224
5
  tags:
6
  - generated_from_trainer
7
- datasets:
8
- - imagefolder
9
  metrics:
10
  - precision
11
  - recall
@@ -13,29 +11,7 @@ metrics:
13
  - accuracy
14
  model-index:
15
  - name: convnext-tiny-224-finetuned-barkley
16
- results:
17
- - task:
18
- name: Image Classification
19
- type: image-classification
20
- dataset:
21
- name: imagefolder
22
- type: imagefolder
23
- config: default
24
- split: train
25
- args: default
26
- metrics:
27
- - name: Precision
28
- type: precision
29
- value: 0.9936145510835913
30
- - name: Recall
31
- type: recall
32
- value: 0.993421052631579
33
- - name: F1
34
- type: f1
35
- value: 0.993419541966282
36
- - name: Accuracy
37
- type: accuracy
38
- value: 0.9939393939393939
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,15 +19,15 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # convnext-tiny-224-finetuned-barkley
45
 
46
- This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on the imagefolder dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0266
49
- - Precision: 0.9936
50
- - Recall: 0.9934
51
- - F1: 0.9934
52
- - Accuracy: 0.9939
53
- - Top1 Accuracy: 0.9934
54
- - Error Rate: 0.0061
55
 
56
  ## Model description
57
 
@@ -71,9 +47,11 @@ More information needed
71
 
72
  The following hyperparameters were used during training:
73
  - learning_rate: 0.0002
74
- - train_batch_size: 32
75
- - eval_batch_size: 32
76
  - seed: 42
 
 
77
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
  - lr_scheduler_type: linear
79
  - lr_scheduler_warmup_ratio: 0.1
@@ -82,29 +60,43 @@ The following hyperparameters were used during training:
82
 
83
  ### Training results
84
 
85
- | Training Loss | Epoch | Step | Accuracy | Error Rate | F1 | Validation Loss | Precision | Recall | Top1 Accuracy |
86
- |:-------------:|:-----:|:----:|:--------:|:----------:|:------:|:---------------:|:---------:|:------:|:-------------:|
87
- | 1.576 | 1.0 | 38 | 0.3479 | 0.6521 | 0.2952 | 1.5660 | 0.3007 | 0.3684 | 0.3684 |
88
- | 1.5469 | 2.0 | 76 | 0.3854 | 0.6146 | 0.3215 | 1.5353 | 0.3141 | 0.4079 | 0.4079 |
89
- | 1.5081 | 3.0 | 114 | 0.4436 | 0.5564 | 0.3961 | 1.4782 | 0.5684 | 0.4671 | 0.4671 |
90
- | 1.4278 | 4.0 | 152 | 0.5866 | 0.4134 | 0.5840 | 1.3718 | 0.7088 | 0.6053 | 0.6053 |
91
- | 1.2938 | 5.0 | 190 | 0.8290 | 0.1710 | 0.8378 | 1.1909 | 0.8582 | 0.8355 | 0.8355 |
92
- | 1.0696 | 6.0 | 228 | 0.9205 | 0.0795 | 0.9215 | 0.9353 | 0.9243 | 0.9211 | 0.9211 |
93
- | 0.789 | 7.0 | 266 | 0.9691 | 0.0309 | 0.9673 | 0.6347 | 0.9680 | 0.9671 | 0.9671 |
94
- | 0.506 | 8.0 | 304 | 0.9752 | 0.0248 | 0.9739 | 0.3910 | 0.9750 | 0.9737 | 0.9737 |
95
- | 0.2876 | 9.0 | 342 | 0.9814 | 0.0186 | 0.9802 | 0.2126 | 0.9808 | 0.9803 | 0.9803 |
96
- | 0.1722 | 10.0 | 380 | 0.9818 | 0.0182 | 0.9799 | 0.1409 | 0.9809 | 0.9803 | 0.9803 |
97
- | 0.1082 | 11.0 | 418 | 0.9939 | 0.0061 | 0.9934 | 0.0794 | 0.9936 | 0.9934 | 0.9934 |
98
- | 0.0715 | 12.0 | 456 | 0.9939 | 0.0061 | 0.9934 | 0.0577 | 0.9936 | 0.9934 | 0.9934 |
99
- | 0.0492 | 13.0 | 494 | 0.9879 | 0.0121 | 0.9867 | 0.0440 | 0.9872 | 0.9868 | 0.9868 |
100
- | 0.0375 | 14.0 | 532 | 0.0266 | 0.9936 | 0.9934 | 0.9934 | 0.9939 | 0.9934 | 0.0061 |
101
- | 0.029 | 15.0 | 570 | 0.0313 | 0.9936 | 0.9934 | 0.9934 | 0.9939 | 0.9934 | 0.0061 |
102
- | 0.0158 | 16.0 | 608 | 0.0408 | 0.9872 | 0.9868 | 0.9867 | 0.9879 | 0.9868 | 0.0121 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
 
105
  ### Framework versions
106
 
107
- - Transformers 4.45.2
108
  - Pytorch 2.3.1+cu121
109
  - Datasets 3.0.1
110
- - Tokenizers 0.20.1
 
4
  base_model: facebook/convnext-tiny-224
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - precision
9
  - recall
 
11
  - accuracy
12
  model-index:
13
  - name: convnext-tiny-224-finetuned-barkley
14
+ results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
19
 
20
  # convnext-tiny-224-finetuned-barkley
21
 
22
+ This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.0128
25
+ - Precision: 1.0
26
+ - Recall: 1.0
27
+ - F1: 1.0
28
+ - Accuracy: 1.0
29
+ - Top1 Accuracy: 1.0
30
+ - Error Rate: 0.0
31
 
32
  ## Model description
33
 
 
47
 
48
  The following hyperparameters were used during training:
49
  - learning_rate: 0.0002
50
+ - train_batch_size: 8
51
+ - eval_batch_size: 8
52
  - seed: 42
53
+ - gradient_accumulation_steps: 4
54
+ - total_train_batch_size: 32
55
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
56
  - lr_scheduler_type: linear
57
  - lr_scheduler_warmup_ratio: 0.1
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy | Top1 Accuracy | Error Rate |
64
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|:-------------:|:----------:|
65
+ | 1.6288 | 1.0 | 38 | 1.6005 | 0.2133 | 0.2697 | 0.2043 | 0.2371 | 0.2697 | 0.7629 |
66
+ | 1.6059 | 2.0 | 76 | 1.5802 | 0.2384 | 0.2763 | 0.2243 | 0.2473 | 0.2763 | 0.7527 |
67
+ | 1.5808 | 3.0 | 114 | 1.5570 | 0.2778 | 0.3026 | 0.2595 | 0.2744 | 0.3026 | 0.7256 |
68
+ | 1.5555 | 4.0 | 152 | 1.5291 | 0.3831 | 0.375 | 0.3491 | 0.3511 | 0.375 | 0.6489 |
69
+ | 1.5232 | 5.0 | 190 | 1.4933 | 0.4252 | 0.4408 | 0.4154 | 0.4147 | 0.4408 | 0.5853 |
70
+ | 1.4784 | 6.0 | 228 | 1.4484 | 0.5076 | 0.5197 | 0.4926 | 0.4972 | 0.5197 | 0.5028 |
71
+ | 1.4242 | 7.0 | 266 | 1.3902 | 0.6857 | 0.6382 | 0.6307 | 0.6249 | 0.6382 | 0.3751 |
72
+ | 1.3586 | 8.0 | 304 | 1.3186 | 0.7728 | 0.7171 | 0.7166 | 0.7134 | 0.7171 | 0.2866 |
73
+ | 1.276 | 9.0 | 342 | 1.2236 | 0.8547 | 0.8026 | 0.8109 | 0.8060 | 0.8026 | 0.1940 |
74
+ | 1.1778 | 10.0 | 380 | 1.1122 | 0.8899 | 0.8553 | 0.8609 | 0.8601 | 0.8553 | 0.1399 |
75
+ | 1.0543 | 11.0 | 418 | 0.9839 | 0.9064 | 0.8947 | 0.8958 | 0.9005 | 0.8947 | 0.0995 |
76
+ | 0.921 | 12.0 | 456 | 0.8418 | 0.9541 | 0.9539 | 0.9537 | 0.9575 | 0.9539 | 0.0425 |
77
+ | 0.773 | 13.0 | 494 | 0.6935 | 0.9624 | 0.9605 | 0.9605 | 0.9652 | 0.9605 | 0.0348 |
78
+ | 0.6204 | 14.0 | 532 | 0.5515 | 0.9688 | 0.9671 | 0.9672 | 0.9708 | 0.9671 | 0.0292 |
79
+ | 0.4835 | 15.0 | 570 | 0.4146 | 0.9704 | 0.9671 | 0.9676 | 0.9697 | 0.9671 | 0.0303 |
80
+ | 0.3641 | 16.0 | 608 | 0.3043 | 0.9805 | 0.9803 | 0.9802 | 0.9830 | 0.9803 | 0.0170 |
81
+ | 0.2706 | 17.0 | 646 | 0.2247 | 0.9805 | 0.9803 | 0.9802 | 0.9830 | 0.9803 | 0.0170 |
82
+ | 0.1998 | 18.0 | 684 | 0.1705 | 0.9873 | 0.9868 | 0.9868 | 0.9889 | 0.9868 | 0.0111 |
83
+ | 0.1446 | 19.0 | 722 | 0.1271 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
84
+ | 0.1106 | 20.0 | 760 | 0.1047 | 0.9873 | 0.9868 | 0.9868 | 0.9889 | 0.9868 | 0.0111 |
85
+ | 0.0872 | 21.0 | 798 | 0.0780 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
86
+ | 0.0614 | 22.0 | 836 | 0.0739 | 0.9873 | 0.9868 | 0.9868 | 0.9889 | 0.9868 | 0.0111 |
87
+ | 0.0491 | 23.0 | 874 | 0.0517 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
88
+ | 0.0365 | 24.0 | 912 | 0.0401 | 0.9871 | 0.9868 | 0.9868 | 0.9878 | 0.9868 | 0.0122 |
89
+ | 0.0255 | 25.0 | 950 | 0.0336 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
90
+ | 0.0212 | 26.0 | 988 | 0.0377 | 0.9873 | 0.9868 | 0.9868 | 0.9889 | 0.9868 | 0.0111 |
91
+ | 0.0175 | 27.0 | 1026 | 0.0195 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
92
+ | 0.0125 | 28.0 | 1064 | 0.0214 | 0.9936 | 0.9934 | 0.9934 | 0.9933 | 0.9934 | 0.0067 |
93
+ | 0.0155 | 29.0 | 1102 | 0.0128 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 |
94
+ | 0.0104 | 30.0 | 1140 | 0.0159 | 0.9937 | 0.9934 | 0.9934 | 0.9944 | 0.9934 | 0.0056 |
95
 
96
 
97
  ### Framework versions
98
 
99
+ - Transformers 4.44.2
100
  - Pytorch 2.3.1+cu121
101
  - Datasets 3.0.1
102
+ - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.9939393939393939,
4
- "eval_error_rate": 0.0060606060606061,
5
- "eval_f1": 0.993419541966282,
6
- "eval_loss": 0.02658209018409252,
7
- "eval_precision": 0.9936145510835913,
8
- "eval_recall": 0.993421052631579,
9
- "eval_runtime": 48.6228,
10
- "eval_samples_per_second": 3.126,
11
- "eval_steps_per_second": 0.103,
12
- "eval_top1_accuracy": 0.993421052631579,
13
- "total_flos": 4.889238721360036e+17,
14
- "train_loss": 0.005142551405649436,
15
- "train_runtime": 1686.6977,
16
- "train_samples_per_second": 21.628,
17
- "train_steps_per_second": 0.676
18
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_error_rate": 0.0,
5
+ "eval_f1": 1.0,
6
+ "eval_loss": 0.012786287814378738,
7
+ "eval_precision": 1.0,
8
+ "eval_recall": 1.0,
9
+ "eval_runtime": 55.4352,
10
+ "eval_samples_per_second": 2.742,
11
+ "eval_steps_per_second": 0.343,
12
+ "eval_top1_accuracy": 1.0,
13
+ "total_flos": 9.167322602550067e+17,
14
+ "train_loss": 0.6629255272840199,
15
+ "train_runtime": 13519.4546,
16
+ "train_samples_per_second": 2.698,
17
+ "train_steps_per_second": 0.084
18
  }
classification_report.png ADDED
config.json CHANGED
@@ -54,5 +54,5 @@
54
  "stage4"
55
  ],
56
  "torch_dtype": "float32",
57
- "transformers_version": "4.45.2"
58
  }
 
54
  "stage4"
55
  ],
56
  "torch_dtype": "float32",
57
+ "transformers_version": "4.44.2"
58
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.9939393939393939,
4
- "eval_error_rate": 0.0060606060606061,
5
- "eval_f1": 0.993419541966282,
6
- "eval_loss": 0.02658209018409252,
7
- "eval_precision": 0.9936145510835913,
8
- "eval_recall": 0.993421052631579,
9
- "eval_runtime": 48.6228,
10
- "eval_samples_per_second": 3.126,
11
- "eval_steps_per_second": 0.103,
12
- "eval_top1_accuracy": 0.993421052631579
13
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_error_rate": 0.0,
5
+ "eval_f1": 1.0,
6
+ "eval_loss": 0.012786287814378738,
7
+ "eval_precision": 1.0,
8
+ "eval_recall": 1.0,
9
+ "eval_runtime": 55.4352,
10
+ "eval_samples_per_second": 2.742,
11
+ "eval_steps_per_second": 0.343,
12
+ "eval_top1_accuracy": 1.0
13
  }
integrated_gradients_grid.jpg ADDED

Git LFS Details

  • SHA256: de66330406018098f0414bd70e934e1c1c987cd9354484224b78335678475532
  • Pointer size: 132 Bytes
  • Size of remote file: 1.75 MB
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62798162e3f57ffa189d6ee7c57129c330720ace9dcf029005622f20bf4eb7dd
3
  size 111317164
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e5faa27c00aa42aa187e40f60ad05ba472a4242928fe76fc51a447cebef11be
3
  size 111317164
train_and_eval.jpg ADDED
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 16.0,
3
- "total_flos": 4.889238721360036e+17,
4
- "train_loss": 0.005142551405649436,
5
- "train_runtime": 1686.6977,
6
- "train_samples_per_second": 21.628,
7
- "train_steps_per_second": 0.676
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 9.167322602550067e+17,
4
+ "train_loss": 0.6629255272840199,
5
+ "train_runtime": 13519.4546,
6
+ "train_samples_per_second": 2.698,
7
+ "train_steps_per_second": 0.084
8
  }
trainer_state.json CHANGED
@@ -1,424 +1,774 @@
1
  {
2
- "best_metric": 0.02658209018409252,
3
- "best_model_checkpoint": "convnext-tiny-224-finetuned-barkley\\checkpoint-532",
4
- "epoch": 16.0,
5
  "eval_steps": 500,
6
- "global_step": 608,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "train_accuracy": 0.3092105263157895
14
  },
15
  {
16
  "epoch": 1.0,
17
- "grad_norm": 2.351605176925659,
18
- "learning_rate": 1.1659108139509125e-06,
19
- "loss": 1.576,
20
  "step": 38
21
  },
22
  {
23
  "epoch": 1.0,
24
- "eval_accuracy": 0.34791666666666665,
25
- "eval_error_rate": 0.6520833333333333,
26
- "eval_f1": 0.2952478867653256,
27
- "eval_loss": 1.5659886598587036,
28
- "eval_precision": 0.30074270516946977,
29
- "eval_recall": 0.3684210526315789,
30
- "eval_runtime": 51.2455,
31
- "eval_samples_per_second": 2.966,
32
- "eval_steps_per_second": 0.098,
33
- "eval_top1_accuracy": 0.3684210526315789,
34
  "step": 38
35
  },
36
  {
37
  "epoch": 2.0,
38
- "train_accuracy": 0.36622807017543857
39
  },
40
  {
41
  "epoch": 2.0,
42
- "grad_norm": 2.0893232822418213,
43
- "learning_rate": 2.23574935501902e-06,
44
- "loss": 1.5469,
45
  "step": 76
46
  },
47
  {
48
  "epoch": 2.0,
49
- "eval_accuracy": 0.38541666666666663,
50
- "eval_error_rate": 0.6145833333333334,
51
- "eval_f1": 0.32146190433952665,
52
- "eval_loss": 1.5353103876113892,
53
- "eval_precision": 0.3140966869404633,
54
- "eval_recall": 0.40789473684210525,
55
- "eval_runtime": 52.9551,
56
- "eval_samples_per_second": 2.87,
57
- "eval_steps_per_second": 0.094,
58
- "eval_top1_accuracy": 0.40789473684210525,
59
  "step": 76
60
  },
61
  {
62
  "epoch": 3.0,
63
- "train_accuracy": 0.4137426900584795
64
  },
65
  {
66
  "epoch": 3.0,
67
- "grad_norm": 2.353696823120117,
68
- "learning_rate": 3.927960312504679e-06,
69
- "loss": 1.5081,
70
  "step": 114
71
  },
72
  {
73
  "epoch": 3.0,
74
- "eval_accuracy": 0.44356060606060604,
75
- "eval_error_rate": 0.5564393939393939,
76
- "eval_f1": 0.3961300506250762,
77
- "eval_loss": 1.4782265424728394,
78
- "eval_precision": 0.5683927222362125,
79
- "eval_recall": 0.46710526315789475,
80
- "eval_runtime": 51.3887,
81
- "eval_samples_per_second": 2.958,
82
- "eval_steps_per_second": 0.097,
83
- "eval_top1_accuracy": 0.46710526315789475,
84
  "step": 114
85
  },
86
  {
87
  "epoch": 4.0,
88
- "train_accuracy": 0.4861111111111111
89
  },
90
  {
91
  "epoch": 4.0,
92
- "grad_norm": 2.854907512664795,
93
- "learning_rate": 6.113544042901594e-06,
94
- "loss": 1.4278,
95
  "step": 152
96
  },
97
  {
98
  "epoch": 4.0,
99
- "eval_accuracy": 0.5865530303030304,
100
- "eval_error_rate": 0.4134469696969696,
101
- "eval_f1": 0.5840194800037495,
102
- "eval_loss": 1.3718132972717285,
103
- "eval_precision": 0.7087537646637603,
104
- "eval_recall": 0.6052631578947368,
105
- "eval_runtime": 53.3959,
106
- "eval_samples_per_second": 2.847,
107
- "eval_steps_per_second": 0.094,
108
- "eval_top1_accuracy": 0.6052631578947368,
109
  "step": 152
110
  },
111
  {
112
  "epoch": 5.0,
113
- "train_accuracy": 0.6827485380116959
114
  },
115
  {
116
  "epoch": 5.0,
117
- "grad_norm": 3.946742296218872,
118
- "learning_rate": 8.62589039584572e-06,
119
- "loss": 1.2938,
120
  "step": 190
121
  },
122
  {
123
  "epoch": 5.0,
124
- "eval_accuracy": 0.8289772727272728,
125
- "eval_error_rate": 0.17102272727272716,
126
- "eval_f1": 0.8378200475239326,
127
- "eval_loss": 1.1908537149429321,
128
- "eval_precision": 0.8581657632453227,
129
- "eval_recall": 0.8355263157894737,
130
- "eval_runtime": 52.0457,
131
- "eval_samples_per_second": 2.921,
132
- "eval_steps_per_second": 0.096,
133
- "eval_top1_accuracy": 0.8355263157894737,
134
  "step": 190
135
  },
136
  {
137
  "epoch": 6.0,
138
- "train_accuracy": 0.8713450292397661
139
  },
140
  {
141
  "epoch": 6.0,
142
- "grad_norm": 2.741647720336914,
143
- "learning_rate": 1.1273479642392808e-05,
144
- "loss": 1.0696,
145
  "step": 228
146
  },
147
  {
148
  "epoch": 6.0,
149
- "eval_accuracy": 0.9204545454545455,
150
- "eval_error_rate": 0.07954545454545447,
151
- "eval_f1": 0.9214891548724753,
152
- "eval_loss": 0.9352867007255554,
153
- "eval_precision": 0.9242831541218637,
154
- "eval_recall": 0.9210526315789473,
155
- "eval_runtime": 53.2027,
156
- "eval_samples_per_second": 2.857,
157
- "eval_steps_per_second": 0.094,
158
- "eval_top1_accuracy": 0.9210526315789473,
159
  "step": 228
160
  },
161
  {
162
  "epoch": 7.0,
163
- "train_accuracy": 0.9407894736842105
164
  },
165
  {
166
  "epoch": 7.0,
167
- "grad_norm": 3.6590983867645264,
168
- "learning_rate": 1.3854482295832083e-05,
169
- "loss": 0.789,
170
  "step": 266
171
  },
172
  {
173
  "epoch": 7.0,
174
- "eval_accuracy": 0.9691287878787879,
175
- "eval_error_rate": 0.030871212121212133,
176
- "eval_f1": 0.9672834045899062,
177
- "eval_loss": 0.6346580386161804,
178
- "eval_precision": 0.9680208585981083,
179
- "eval_recall": 0.9671052631578947,
180
- "eval_runtime": 52.1891,
181
- "eval_samples_per_second": 2.912,
182
- "eval_steps_per_second": 0.096,
183
- "eval_top1_accuracy": 0.9671052631578947,
184
  "step": 266
185
  },
186
  {
187
  "epoch": 8.0,
188
- "train_accuracy": 0.9634502923976608
189
  },
190
  {
191
  "epoch": 8.0,
192
- "grad_norm": 3.982485055923462,
193
- "learning_rate": 1.6172144859969913e-05,
194
- "loss": 0.506,
195
  "step": 304
196
  },
197
  {
198
  "epoch": 8.0,
199
- "eval_accuracy": 0.975189393939394,
200
- "eval_error_rate": 0.024810606060606033,
201
- "eval_f1": 0.9738558660758309,
202
- "eval_loss": 0.3909807801246643,
203
- "eval_precision": 0.9750055285272005,
204
- "eval_recall": 0.9736842105263158,
205
- "eval_runtime": 50.6533,
206
- "eval_samples_per_second": 3.001,
207
- "eval_steps_per_second": 0.099,
208
- "eval_top1_accuracy": 0.9736842105263158,
209
  "step": 304
210
  },
211
  {
212
  "epoch": 9.0,
213
- "train_accuracy": 0.9780701754385965
214
  },
215
  {
216
  "epoch": 9.0,
217
- "grad_norm": 2.4651834964752197,
218
- "learning_rate": 1.8049788627450628e-05,
219
- "loss": 0.2876,
220
  "step": 342
221
  },
222
  {
223
  "epoch": 9.0,
224
- "eval_accuracy": 0.981439393939394,
225
- "eval_error_rate": 0.018560606060606055,
226
- "eval_f1": 0.9802473202746875,
227
- "eval_loss": 0.21257419884204865,
228
- "eval_precision": 0.980843653250774,
229
- "eval_recall": 0.9802631578947368,
230
- "eval_runtime": 50.1498,
231
- "eval_samples_per_second": 3.031,
232
- "eval_steps_per_second": 0.1,
233
- "eval_top1_accuracy": 0.9802631578947368,
234
  "step": 342
235
  },
236
  {
237
  "epoch": 10.0,
238
- "train_accuracy": 0.9780701754385965
239
  },
240
  {
241
  "epoch": 10.0,
242
- "grad_norm": 3.773145914077759,
243
- "learning_rate": 1.9318622999689343e-05,
244
- "loss": 0.1722,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 10.0,
249
- "eval_accuracy": 0.9818181818181818,
250
- "eval_error_rate": 0.018181818181818188,
251
- "eval_f1": 0.979943544279758,
252
- "eval_loss": 0.14089564979076385,
253
- "eval_precision": 0.9808553804296839,
254
- "eval_recall": 0.9802631578947368,
255
- "eval_runtime": 50.1171,
256
- "eval_samples_per_second": 3.033,
257
- "eval_steps_per_second": 0.1,
258
- "eval_top1_accuracy": 0.9802631578947368,
259
  "step": 380
260
  },
261
  {
262
  "epoch": 11.0,
263
- "train_accuracy": 0.9875730994152047
264
  },
265
  {
266
  "epoch": 11.0,
267
- "grad_norm": 2.976818561553955,
268
- "learning_rate": 1.995005803798479e-05,
269
- "loss": 0.1082,
270
  "step": 418
271
  },
272
  {
273
  "epoch": 11.0,
274
- "eval_accuracy": 0.9939393939393939,
275
- "eval_error_rate": 0.0060606060606061,
276
- "eval_f1": 0.993419541966282,
277
- "eval_loss": 0.0794038251042366,
278
- "eval_precision": 0.9936145510835913,
279
- "eval_recall": 0.993421052631579,
280
- "eval_runtime": 52.5912,
281
- "eval_samples_per_second": 2.89,
282
- "eval_steps_per_second": 0.095,
283
- "eval_top1_accuracy": 0.993421052631579,
284
  "step": 418
285
  },
286
  {
287
  "epoch": 12.0,
288
- "train_accuracy": 0.9912280701754386
289
  },
290
  {
291
  "epoch": 12.0,
292
- "grad_norm": 5.014571189880371,
293
- "learning_rate": 1.9972038083659915e-05,
294
- "loss": 0.0715,
295
  "step": 456
296
  },
297
  {
298
  "epoch": 12.0,
299
- "eval_accuracy": 0.9939393939393939,
300
- "eval_error_rate": 0.0060606060606061,
301
- "eval_f1": 0.993419541966282,
302
- "eval_loss": 0.057679127901792526,
303
- "eval_precision": 0.9936145510835913,
304
- "eval_recall": 0.993421052631579,
305
- "eval_runtime": 50.9469,
306
- "eval_samples_per_second": 2.984,
307
- "eval_steps_per_second": 0.098,
308
- "eval_top1_accuracy": 0.993421052631579,
309
  "step": 456
310
  },
311
  {
312
  "epoch": 13.0,
313
- "train_accuracy": 0.9912280701754386
314
  },
315
  {
316
  "epoch": 13.0,
317
- "grad_norm": 1.5518372058868408,
318
- "learning_rate": 1.9813886204892037e-05,
319
- "loss": 0.0492,
320
  "step": 494
321
  },
322
  {
323
  "epoch": 13.0,
324
- "eval_accuracy": 0.9878787878787879,
325
- "eval_error_rate": 0.012121212121212088,
326
- "eval_f1": 0.9867362170674966,
327
- "eval_loss": 0.04395502433180809,
328
- "eval_precision": 0.9872349657566376,
329
- "eval_recall": 0.9868421052631579,
330
- "eval_runtime": 54.1085,
331
- "eval_samples_per_second": 2.809,
332
- "eval_steps_per_second": 0.092,
333
- "eval_top1_accuracy": 0.9868421052631579,
334
  "step": 494
335
  },
336
  {
337
  "epoch": 14.0,
338
- "train_accuracy": 0.9950657894736842
339
  },
340
  {
341
  "epoch": 14.0,
342
- "grad_norm": 0.6904532313346863,
343
- "learning_rate": 1.9527753116224055e-05,
344
- "loss": 0.0375,
345
  "step": 532
346
  },
347
  {
348
  "epoch": 14.0,
349
- "eval_accuracy": 0.9939393939393939,
350
- "eval_error_rate": 0.0060606060606061,
351
- "eval_f1": 0.993419541966282,
352
- "eval_loss": 0.02658209018409252,
353
- "eval_precision": 0.9936145510835913,
354
- "eval_recall": 0.993421052631579,
355
- "eval_runtime": 52.868,
356
- "eval_samples_per_second": 2.875,
357
- "eval_steps_per_second": 0.095,
358
- "eval_top1_accuracy": 0.993421052631579,
359
  "step": 532
360
  },
361
  {
362
  "epoch": 15.0,
363
- "train_accuracy": 0.993421052631579
364
  },
365
  {
366
  "epoch": 15.0,
367
- "grad_norm": 1.3837875127792358,
368
- "learning_rate": 1.911506206288264e-05,
369
- "loss": 0.029,
370
  "step": 570
371
  },
372
  {
373
  "epoch": 15.0,
374
- "eval_accuracy": 0.9939393939393939,
375
- "eval_error_rate": 0.0060606060606061,
376
- "eval_f1": 0.993419541966282,
377
- "eval_loss": 0.03128606453537941,
378
- "eval_precision": 0.9936145510835913,
379
- "eval_recall": 0.993421052631579,
380
- "eval_runtime": 52.6477,
381
- "eval_samples_per_second": 2.887,
382
- "eval_steps_per_second": 0.095,
383
- "eval_top1_accuracy": 0.993421052631579,
384
  "step": 570
385
  },
386
  {
387
  "epoch": 16.0,
388
- "train_accuracy": 0.9985380116959064
389
  },
390
  {
391
  "epoch": 16.0,
392
- "grad_norm": 2.590073823928833,
393
- "learning_rate": 1.8565251293796298e-05,
394
- "loss": 0.0158,
395
  "step": 608
396
  },
397
  {
398
  "epoch": 16.0,
399
- "eval_accuracy": 0.9878787878787879,
400
- "eval_error_rate": 0.012121212121212088,
401
- "eval_f1": 0.9867362170674966,
402
- "eval_loss": 0.04083804041147232,
403
- "eval_precision": 0.9872349657566376,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  "eval_recall": 0.9868421052631579,
405
- "eval_runtime": 52.9598,
406
- "eval_samples_per_second": 2.87,
407
- "eval_steps_per_second": 0.094,
408
  "eval_top1_accuracy": 0.9868421052631579,
409
- "step": 608
410
  },
411
  {
412
- "epoch": 16.0,
413
- "step": 608,
414
- "total_flos": 4.889238721360036e+17,
415
- "train_loss": 0.005142551405649436,
416
- "train_runtime": 1686.6977,
417
- "train_samples_per_second": 21.628,
418
- "train_steps_per_second": 0.676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  }
420
  ],
421
- "logging_steps": 500,
422
  "max_steps": 1140,
423
  "num_input_tokens_seen": 0,
424
  "num_train_epochs": 30,
@@ -430,7 +780,7 @@
430
  "early_stopping_threshold": 0.0
431
  },
432
  "attributes": {
433
- "early_stopping_patience_counter": 2
434
  }
435
  },
436
  "TrainerControl": {
@@ -444,8 +794,8 @@
444
  "attributes": {}
445
  }
446
  },
447
- "total_flos": 4.889238721360036e+17,
448
- "train_batch_size": 32,
449
  "trial_name": null,
450
  "trial_params": null
451
  }
 
1
  {
2
+ "best_metric": 0.012786287814378738,
3
+ "best_model_checkpoint": "convnext-tiny-224-finetuned-barkley\\checkpoint-1102",
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 1140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "train_accuracy": 0.2138157894736842
14
  },
15
  {
16
  "epoch": 1.0,
17
+ "grad_norm": 2.7230629920959473,
18
+ "learning_rate": 8.365843306988109e-07,
19
+ "loss": 1.6288,
20
  "step": 38
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.2371292106586224,
25
+ "eval_error_rate": 0.7628707893413775,
26
+ "eval_f1": 0.20430842832369756,
27
+ "eval_loss": 1.6004831790924072,
28
+ "eval_precision": 0.2132792246940375,
29
+ "eval_recall": 0.26973684210526316,
30
+ "eval_runtime": 41.666,
31
+ "eval_samples_per_second": 3.648,
32
+ "eval_steps_per_second": 0.456,
33
+ "eval_top1_accuracy": 0.26973684210526316,
34
  "step": 38
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "train_accuracy": 0.25
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "grad_norm": 2.8333628177642822,
43
+ "learning_rate": 9.460584867009364e-07,
44
+ "loss": 1.6059,
45
  "step": 76
46
  },
47
  {
48
  "epoch": 2.0,
49
+ "eval_accuracy": 0.24728506787330323,
50
+ "eval_error_rate": 0.7527149321266968,
51
+ "eval_f1": 0.2243479907737179,
52
+ "eval_loss": 1.5801552534103394,
53
+ "eval_precision": 0.2383676582761251,
54
+ "eval_recall": 0.27631578947368424,
55
+ "eval_runtime": 40.3741,
56
+ "eval_samples_per_second": 3.765,
57
+ "eval_steps_per_second": 0.471,
58
+ "eval_top1_accuracy": 0.27631578947368424,
59
  "step": 76
60
  },
61
  {
62
  "epoch": 3.0,
63
+ "train_accuracy": 0.2894736842105263
64
  },
65
  {
66
  "epoch": 3.0,
67
+ "grad_norm": 2.8583333492279053,
68
+ "learning_rate": 1.1275880849384341e-06,
69
+ "loss": 1.5808,
70
  "step": 114
71
  },
72
  {
73
  "epoch": 3.0,
74
+ "eval_accuracy": 0.2744343891402715,
75
+ "eval_error_rate": 0.7255656108597285,
76
+ "eval_f1": 0.25945030547058157,
77
+ "eval_loss": 1.5570068359375,
78
+ "eval_precision": 0.2778035204289794,
79
+ "eval_recall": 0.3026315789473684,
80
+ "eval_runtime": 25.2172,
81
+ "eval_samples_per_second": 6.028,
82
+ "eval_steps_per_second": 0.753,
83
+ "eval_top1_accuracy": 0.3026315789473684,
84
  "step": 114
85
  },
86
  {
87
  "epoch": 4.0,
88
+ "train_accuracy": 0.32383040935672514
89
  },
90
  {
91
  "epoch": 4.0,
92
+ "grad_norm": 2.166118860244751,
93
+ "learning_rate": 1.3797895548168056e-06,
94
+ "loss": 1.5555,
95
  "step": 152
96
  },
97
  {
98
  "epoch": 4.0,
99
+ "eval_accuracy": 0.3510859728506787,
100
+ "eval_error_rate": 0.6489140271493212,
101
+ "eval_f1": 0.3490548373858623,
102
+ "eval_loss": 1.5290976762771606,
103
+ "eval_precision": 0.3831372574793627,
104
+ "eval_recall": 0.375,
105
+ "eval_runtime": 28.1947,
106
+ "eval_samples_per_second": 5.391,
107
+ "eval_steps_per_second": 0.674,
108
+ "eval_top1_accuracy": 0.375,
109
  "step": 152
110
  },
111
  {
112
  "epoch": 5.0,
113
+ "train_accuracy": 0.38742690058479534
114
  },
115
  {
116
  "epoch": 5.0,
117
+ "grad_norm": 2.677858591079712,
118
+ "learning_rate": 1.7007406834242322e-06,
119
+ "loss": 1.5232,
120
  "step": 190
121
  },
122
  {
123
  "epoch": 5.0,
124
+ "eval_accuracy": 0.41472599296128704,
125
+ "eval_error_rate": 0.585274007038713,
126
+ "eval_f1": 0.41544045031957555,
127
+ "eval_loss": 1.4932990074157715,
128
+ "eval_precision": 0.4251577933805101,
129
+ "eval_recall": 0.4407894736842105,
130
+ "eval_runtime": 25.5314,
131
+ "eval_samples_per_second": 5.953,
132
+ "eval_steps_per_second": 0.744,
133
+ "eval_top1_accuracy": 0.4407894736842105,
134
  "step": 190
135
  },
136
  {
137
  "epoch": 6.0,
138
+ "train_accuracy": 0.47076023391812866
139
  },
140
  {
141
  "epoch": 6.0,
142
+ "grad_norm": 2.4222543239593506,
143
+ "learning_rate": 2.087995266130065e-06,
144
+ "loss": 1.4784,
145
  "step": 228
146
  },
147
  {
148
  "epoch": 6.0,
149
+ "eval_accuracy": 0.49721468074409253,
150
+ "eval_error_rate": 0.5027853192559075,
151
+ "eval_f1": 0.4925881060058483,
152
+ "eval_loss": 1.4484349489212036,
153
+ "eval_precision": 0.5076020884114232,
154
+ "eval_recall": 0.5197368421052632,
155
+ "eval_runtime": 56.6448,
156
+ "eval_samples_per_second": 2.683,
157
+ "eval_steps_per_second": 0.335,
158
+ "eval_top1_accuracy": 0.5197368421052632,
159
  "step": 228
160
  },
161
  {
162
  "epoch": 7.0,
163
+ "train_accuracy": 0.5701754385964912
164
  },
165
  {
166
  "epoch": 7.0,
167
+ "grad_norm": 3.0168983936309814,
168
+ "learning_rate": 2.5386017509096417e-06,
169
+ "loss": 1.4242,
170
  "step": 266
171
  },
172
  {
173
  "epoch": 7.0,
174
+ "eval_accuracy": 0.6248768225238813,
175
+ "eval_error_rate": 0.3751231774761187,
176
+ "eval_f1": 0.6307132735050356,
177
+ "eval_loss": 1.390175223350525,
178
+ "eval_precision": 0.6856800923548139,
179
+ "eval_recall": 0.6381578947368421,
180
+ "eval_runtime": 59.0359,
181
+ "eval_samples_per_second": 2.575,
182
+ "eval_steps_per_second": 0.322,
183
+ "eval_top1_accuracy": 0.6381578947368421,
184
  "step": 266
185
  },
186
  {
187
  "epoch": 8.0,
188
+ "train_accuracy": 0.6498538011695907
189
  },
190
  {
191
  "epoch": 8.0,
192
+ "grad_norm": 2.808101177215576,
193
+ "learning_rate": 3.049125734293339e-06,
194
+ "loss": 1.3586,
195
  "step": 304
196
  },
197
  {
198
  "epoch": 8.0,
199
+ "eval_accuracy": 0.7134087481146305,
200
+ "eval_error_rate": 0.2865912518853695,
201
+ "eval_f1": 0.7165677244674166,
202
+ "eval_loss": 1.318568229675293,
203
+ "eval_precision": 0.7728468899521532,
204
+ "eval_recall": 0.7171052631578947,
205
+ "eval_runtime": 55.4154,
206
+ "eval_samples_per_second": 2.743,
207
+ "eval_steps_per_second": 0.343,
208
+ "eval_top1_accuracy": 0.7171052631578947,
209
  "step": 304
210
  },
211
  {
212
  "epoch": 9.0,
213
+ "train_accuracy": 0.7149122807017544
214
  },
215
  {
216
  "epoch": 9.0,
217
+ "grad_norm": 2.8706772327423096,
218
+ "learning_rate": 3.6156761374816205e-06,
219
+ "loss": 1.276,
220
  "step": 342
221
  },
222
  {
223
  "epoch": 9.0,
224
+ "eval_accuracy": 0.805972850678733,
225
+ "eval_error_rate": 0.19402714932126697,
226
+ "eval_f1": 0.8108964691671459,
227
+ "eval_loss": 1.223597526550293,
228
+ "eval_precision": 0.8547017577806745,
229
+ "eval_recall": 0.8026315789473685,
230
+ "eval_runtime": 55.5305,
231
+ "eval_samples_per_second": 2.737,
232
+ "eval_steps_per_second": 0.342,
233
+ "eval_top1_accuracy": 0.8026315789473685,
234
  "step": 342
235
  },
236
  {
237
  "epoch": 10.0,
238
+ "train_accuracy": 0.8033625730994152
239
  },
240
  {
241
  "epoch": 10.0,
242
+ "grad_norm": 3.2001399993896484,
243
+ "learning_rate": 4.233934863118696e-06,
244
+ "loss": 1.1778,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 10.0,
249
+ "eval_accuracy": 0.8600754147812971,
250
+ "eval_error_rate": 0.13992458521870288,
251
+ "eval_f1": 0.8609113468479812,
252
+ "eval_loss": 1.1122019290924072,
253
+ "eval_precision": 0.8898553355968974,
254
+ "eval_recall": 0.8552631578947368,
255
+ "eval_runtime": 53.869,
256
+ "eval_samples_per_second": 2.822,
257
+ "eval_steps_per_second": 0.353,
258
+ "eval_top1_accuracy": 0.8552631578947368,
259
  "step": 380
260
  },
261
  {
262
  "epoch": 11.0,
263
+ "train_accuracy": 0.868421052631579
264
  },
265
  {
266
  "epoch": 11.0,
267
+ "grad_norm": 3.1884777545928955,
268
+ "learning_rate": 4.899189706688707e-06,
269
+ "loss": 1.0543,
270
  "step": 418
271
  },
272
  {
273
  "epoch": 11.0,
274
+ "eval_accuracy": 0.9004725992961287,
275
+ "eval_error_rate": 0.09952740070387134,
276
+ "eval_f1": 0.8958336111023342,
277
+ "eval_loss": 0.9839252829551697,
278
+ "eval_precision": 0.9063624095203042,
279
+ "eval_recall": 0.8947368421052632,
280
+ "eval_runtime": 54.1079,
281
+ "eval_samples_per_second": 2.809,
282
+ "eval_steps_per_second": 0.351,
283
+ "eval_top1_accuracy": 0.8947368421052632,
284
  "step": 418
285
  },
286
  {
287
  "epoch": 12.0,
288
+ "train_accuracy": 0.902046783625731
289
  },
290
  {
291
  "epoch": 12.0,
292
+ "grad_norm": 3.692692279815674,
293
+ "learning_rate": 5.6063702716924525e-06,
294
+ "loss": 0.921,
295
  "step": 456
296
  },
297
  {
298
  "epoch": 12.0,
299
+ "eval_accuracy": 0.9575364504776269,
300
+ "eval_error_rate": 0.04246354952237308,
301
+ "eval_f1": 0.9537314326645565,
302
+ "eval_loss": 0.8418065309524536,
303
+ "eval_precision": 0.9540851522650184,
304
+ "eval_recall": 0.9539473684210527,
305
+ "eval_runtime": 54.166,
306
+ "eval_samples_per_second": 2.806,
307
+ "eval_steps_per_second": 0.351,
308
+ "eval_top1_accuracy": 0.9539473684210527,
309
  "step": 456
310
  },
311
  {
312
  "epoch": 13.0,
313
+ "train_accuracy": 0.9371345029239766
314
  },
315
  {
316
  "epoch": 13.0,
317
+ "grad_norm": 3.067211389541626,
318
+ "learning_rate": 6.350086614868708e-06,
319
+ "loss": 0.773,
320
  "step": 494
321
  },
322
  {
323
  "epoch": 13.0,
324
+ "eval_accuracy": 0.9652287581699346,
325
+ "eval_error_rate": 0.034771241830065414,
326
+ "eval_f1": 0.9604621710987601,
327
+ "eval_loss": 0.6935167908668518,
328
+ "eval_precision": 0.9623944201691405,
329
+ "eval_recall": 0.9605263157894737,
330
+ "eval_runtime": 67.8683,
331
+ "eval_samples_per_second": 2.24,
332
+ "eval_steps_per_second": 0.28,
333
+ "eval_top1_accuracy": 0.9605263157894737,
334
  "step": 494
335
  },
336
  {
337
  "epoch": 14.0,
338
+ "train_accuracy": 0.9576023391812866
339
  },
340
  {
341
  "epoch": 14.0,
342
+ "grad_norm": 2.8400356769561768,
343
+ "learning_rate": 7.124670326916465e-06,
344
+ "loss": 0.6204,
345
  "step": 532
346
  },
347
  {
348
  "epoch": 14.0,
349
+ "eval_accuracy": 0.9707843137254901,
350
+ "eval_error_rate": 0.029215686274509878,
351
+ "eval_f1": 0.96721850419808,
352
+ "eval_loss": 0.5515281558036804,
353
+ "eval_precision": 0.9687600085406214,
354
+ "eval_recall": 0.9671052631578947,
355
+ "eval_runtime": 75.408,
356
+ "eval_samples_per_second": 2.016,
357
+ "eval_steps_per_second": 0.252,
358
+ "eval_top1_accuracy": 0.9671052631578947,
359
  "step": 532
360
  },
361
  {
362
  "epoch": 15.0,
363
+ "train_accuracy": 0.9707602339181286
364
  },
365
  {
366
  "epoch": 15.0,
367
+ "grad_norm": 2.3800432682037354,
368
+ "learning_rate": 7.924217735611663e-06,
369
+ "loss": 0.4835,
370
  "step": 570
371
  },
372
  {
373
  "epoch": 15.0,
374
+ "eval_accuracy": 0.9696732026143791,
375
+ "eval_error_rate": 0.03032679738562094,
376
+ "eval_f1": 0.9676301640599072,
377
+ "eval_loss": 0.4145788848400116,
378
+ "eval_precision": 0.9704260651629072,
379
+ "eval_recall": 0.9671052631578947,
380
+ "eval_runtime": 43.8505,
381
+ "eval_samples_per_second": 3.466,
382
+ "eval_steps_per_second": 0.433,
383
+ "eval_top1_accuracy": 0.9671052631578947,
384
  "step": 570
385
  },
386
  {
387
  "epoch": 16.0,
388
+ "train_accuracy": 0.9736842105263158
389
  },
390
  {
391
  "epoch": 16.0,
392
+ "grad_norm": 3.108427047729492,
393
+ "learning_rate": 8.742634902035743e-06,
394
+ "loss": 0.3641,
395
  "step": 608
396
  },
397
  {
398
  "epoch": 16.0,
399
+ "eval_accuracy": 0.9830065359477125,
400
+ "eval_error_rate": 0.01699346405228752,
401
+ "eval_f1": 0.980191060766086,
402
+ "eval_loss": 0.3042638599872589,
403
+ "eval_precision": 0.9805310767959324,
404
+ "eval_recall": 0.9802631578947368,
405
+ "eval_runtime": 32.0995,
406
+ "eval_samples_per_second": 4.735,
407
+ "eval_steps_per_second": 0.592,
408
+ "eval_top1_accuracy": 0.9802631578947368,
409
+ "step": 608
410
+ },
411
+ {
412
+ "epoch": 17.0,
413
+ "train_accuracy": 0.9788011695906432
414
+ },
415
+ {
416
+ "epoch": 17.0,
417
+ "grad_norm": 2.5407485961914062,
418
+ "learning_rate": 9.573684066966612e-06,
419
+ "loss": 0.2706,
420
+ "step": 646
421
+ },
422
+ {
423
+ "epoch": 17.0,
424
+ "eval_accuracy": 0.9830065359477125,
425
+ "eval_error_rate": 0.01699346405228752,
426
+ "eval_f1": 0.980191060766086,
427
+ "eval_loss": 0.22474148869514465,
428
+ "eval_precision": 0.9805310767959324,
429
+ "eval_recall": 0.9802631578947368,
430
+ "eval_runtime": 44.9328,
431
+ "eval_samples_per_second": 3.383,
432
+ "eval_steps_per_second": 0.423,
433
+ "eval_top1_accuracy": 0.9802631578947368,
434
+ "step": 646
435
+ },
436
+ {
437
+ "epoch": 18.0,
438
+ "train_accuracy": 0.9817251461988304
439
+ },
440
+ {
441
+ "epoch": 18.0,
442
+ "grad_norm": 3.097386598587036,
443
+ "learning_rate": 1.0411031193429937e-05,
444
+ "loss": 0.1998,
445
+ "step": 684
446
+ },
447
+ {
448
+ "epoch": 18.0,
449
+ "eval_accuracy": 0.9888888888888889,
450
+ "eval_error_rate": 0.011111111111111072,
451
+ "eval_f1": 0.9867701266776593,
452
+ "eval_loss": 0.17049287259578705,
453
+ "eval_precision": 0.9872979940891655,
454
  "eval_recall": 0.9868421052631579,
455
+ "eval_runtime": 39.0056,
456
+ "eval_samples_per_second": 3.897,
457
+ "eval_steps_per_second": 0.487,
458
  "eval_top1_accuracy": 0.9868421052631579,
459
+ "step": 684
460
  },
461
  {
462
+ "epoch": 19.0,
463
+ "train_accuracy": 0.9890350877192983
464
+ },
465
+ {
466
+ "epoch": 19.0,
467
+ "grad_norm": 2.5862479209899902,
468
+ "learning_rate": 1.1248294243054004e-05,
469
+ "loss": 0.1446,
470
+ "step": 722
471
+ },
472
+ {
473
+ "epoch": 19.0,
474
+ "eval_accuracy": 0.9944444444444445,
475
+ "eval_error_rate": 0.005555555555555536,
476
+ "eval_f1": 0.99343678755752,
477
+ "eval_loss": 0.12706294655799866,
478
+ "eval_precision": 0.9936647173489279,
479
+ "eval_recall": 0.993421052631579,
480
+ "eval_runtime": 35.4207,
481
+ "eval_samples_per_second": 4.291,
482
+ "eval_steps_per_second": 0.536,
483
+ "eval_top1_accuracy": 0.993421052631579,
484
+ "step": 722
485
+ },
486
+ {
487
+ "epoch": 20.0,
488
+ "train_accuracy": 0.9897660818713451
489
+ },
490
+ {
491
+ "epoch": 20.0,
492
+ "grad_norm": 4.520482540130615,
493
+ "learning_rate": 1.2079091818278531e-05,
494
+ "loss": 0.1106,
495
+ "step": 760
496
+ },
497
+ {
498
+ "epoch": 20.0,
499
+ "eval_accuracy": 0.9888888888888889,
500
+ "eval_error_rate": 0.011111111111111072,
501
+ "eval_f1": 0.9867701266776593,
502
+ "eval_loss": 0.1046518012881279,
503
+ "eval_precision": 0.9872979940891655,
504
+ "eval_recall": 0.9868421052631579,
505
+ "eval_runtime": 41.2892,
506
+ "eval_samples_per_second": 3.681,
507
+ "eval_steps_per_second": 0.46,
508
+ "eval_top1_accuracy": 0.9868421052631579,
509
+ "step": 760
510
+ },
511
+ {
512
+ "epoch": 21.0,
513
+ "train_accuracy": 0.9883040935672515
514
+ },
515
+ {
516
+ "epoch": 21.0,
517
+ "grad_norm": 9.188246726989746,
518
+ "learning_rate": 1.2897091799679402e-05,
519
+ "loss": 0.0872,
520
+ "step": 798
521
+ },
522
+ {
523
+ "epoch": 21.0,
524
+ "eval_accuracy": 0.9944444444444445,
525
+ "eval_error_rate": 0.005555555555555536,
526
+ "eval_f1": 0.99343678755752,
527
+ "eval_loss": 0.0779847577214241,
528
+ "eval_precision": 0.9936647173489279,
529
+ "eval_recall": 0.993421052631579,
530
+ "eval_runtime": 68.0671,
531
+ "eval_samples_per_second": 2.233,
532
+ "eval_steps_per_second": 0.279,
533
+ "eval_top1_accuracy": 0.993421052631579,
534
+ "step": 798
535
+ },
536
+ {
537
+ "epoch": 22.0,
538
+ "train_accuracy": 0.993421052631579
539
+ },
540
+ {
541
+ "epoch": 22.0,
542
+ "grad_norm": 1.4848785400390625,
543
+ "learning_rate": 1.3696059607708444e-05,
544
+ "loss": 0.0614,
545
+ "step": 836
546
+ },
547
+ {
548
+ "epoch": 22.0,
549
+ "eval_accuracy": 0.9888888888888889,
550
+ "eval_error_rate": 0.011111111111111072,
551
+ "eval_f1": 0.9867701266776593,
552
+ "eval_loss": 0.07387673109769821,
553
+ "eval_precision": 0.9872979940891655,
554
+ "eval_recall": 0.9868421052631579,
555
+ "eval_runtime": 57.9913,
556
+ "eval_samples_per_second": 2.621,
557
+ "eval_steps_per_second": 0.328,
558
+ "eval_top1_accuracy": 0.9868421052631579,
559
+ "step": 836
560
+ },
561
+ {
562
+ "epoch": 23.0,
563
+ "train_accuracy": 0.9948830409356725
564
+ },
565
+ {
566
+ "epoch": 23.0,
567
+ "grad_norm": 0.7044534683227539,
568
+ "learning_rate": 1.4469905721010107e-05,
569
+ "loss": 0.0491,
570
+ "step": 874
571
+ },
572
+ {
573
+ "epoch": 23.0,
574
+ "eval_accuracy": 0.9944444444444445,
575
+ "eval_error_rate": 0.005555555555555536,
576
+ "eval_f1": 0.99343678755752,
577
+ "eval_loss": 0.05167735368013382,
578
+ "eval_precision": 0.9936647173489279,
579
+ "eval_recall": 0.993421052631579,
580
+ "eval_runtime": 43.1178,
581
+ "eval_samples_per_second": 3.525,
582
+ "eval_steps_per_second": 0.441,
583
+ "eval_top1_accuracy": 0.993421052631579,
584
+ "step": 874
585
+ },
586
+ {
587
+ "epoch": 24.0,
588
+ "train_accuracy": 0.9956140350877193
589
+ },
590
+ {
591
+ "epoch": 24.0,
592
+ "grad_norm": 4.517460346221924,
593
+ "learning_rate": 1.5212732089142938e-05,
594
+ "loss": 0.0365,
595
+ "step": 912
596
+ },
597
+ {
598
+ "epoch": 24.0,
599
+ "eval_accuracy": 0.9877777777777779,
600
+ "eval_error_rate": 0.012222222222222134,
601
+ "eval_f1": 0.9868484170131115,
602
+ "eval_loss": 0.04010358452796936,
603
+ "eval_precision": 0.9870857699805068,
604
+ "eval_recall": 0.9868421052631579,
605
+ "eval_runtime": 37.7666,
606
+ "eval_samples_per_second": 4.025,
607
+ "eval_steps_per_second": 0.503,
608
+ "eval_top1_accuracy": 0.9868421052631579,
609
+ "step": 912
610
+ },
611
+ {
612
+ "epoch": 25.0,
613
+ "train_accuracy": 0.9956140350877193
614
+ },
615
+ {
616
+ "epoch": 25.0,
617
+ "grad_norm": 0.25067076086997986,
618
+ "learning_rate": 1.590081029331129e-05,
619
+ "loss": 0.0255,
620
+ "step": 950
621
+ },
622
+ {
623
+ "epoch": 25.0,
624
+ "eval_accuracy": 0.9944444444444445,
625
+ "eval_error_rate": 0.005555555555555536,
626
+ "eval_f1": 0.99343678755752,
627
+ "eval_loss": 0.033598948270082474,
628
+ "eval_precision": 0.9936647173489279,
629
+ "eval_recall": 0.993421052631579,
630
+ "eval_runtime": 41.2598,
631
+ "eval_samples_per_second": 3.684,
632
+ "eval_steps_per_second": 0.46,
633
+ "eval_top1_accuracy": 0.993421052631579,
634
+ "step": 950
635
+ },
636
+ {
637
+ "epoch": 26.0,
638
+ "train_accuracy": 0.9963450292397661
639
+ },
640
+ {
641
+ "epoch": 26.0,
642
+ "grad_norm": 3.2572007179260254,
643
+ "learning_rate": 1.65660651368813e-05,
644
+ "loss": 0.0212,
645
+ "step": 988
646
+ },
647
+ {
648
+ "epoch": 26.0,
649
+ "eval_accuracy": 0.9888888888888889,
650
+ "eval_error_rate": 0.011111111111111072,
651
+ "eval_f1": 0.9867701266776593,
652
+ "eval_loss": 0.037725575268268585,
653
+ "eval_precision": 0.9872979940891655,
654
+ "eval_recall": 0.9868421052631579,
655
+ "eval_runtime": 39.6757,
656
+ "eval_samples_per_second": 3.831,
657
+ "eval_steps_per_second": 0.479,
658
+ "eval_top1_accuracy": 0.9868421052631579,
659
+ "step": 988
660
+ },
661
+ {
662
+ "epoch": 27.0,
663
+ "train_accuracy": 0.9956140350877193
664
+ },
665
+ {
666
+ "epoch": 27.0,
667
+ "grad_norm": 0.9294455051422119,
668
+ "learning_rate": 1.7184323862518377e-05,
669
+ "loss": 0.0175,
670
+ "step": 1026
671
+ },
672
+ {
673
+ "epoch": 27.0,
674
+ "eval_accuracy": 0.9944444444444445,
675
+ "eval_error_rate": 0.005555555555555536,
676
+ "eval_f1": 0.99343678755752,
677
+ "eval_loss": 0.019463175907731056,
678
+ "eval_precision": 0.9936647173489279,
679
+ "eval_recall": 0.993421052631579,
680
+ "eval_runtime": 41.079,
681
+ "eval_samples_per_second": 3.7,
682
+ "eval_steps_per_second": 0.463,
683
+ "eval_top1_accuracy": 0.993421052631579,
684
+ "step": 1026
685
+ },
686
+ {
687
+ "epoch": 28.0,
688
+ "train_accuracy": 0.9978070175438597
689
+ },
690
+ {
691
+ "epoch": 28.0,
692
+ "grad_norm": 3.1146209239959717,
693
+ "learning_rate": 1.775087426570666e-05,
694
+ "loss": 0.0125,
695
+ "step": 1064
696
+ },
697
+ {
698
+ "epoch": 28.0,
699
+ "eval_accuracy": 0.9933333333333334,
700
+ "eval_error_rate": 0.006666666666666599,
701
+ "eval_f1": 0.9934103601236665,
702
+ "eval_loss": 0.02142958901822567,
703
+ "eval_precision": 0.9935988620199147,
704
+ "eval_recall": 0.993421052631579,
705
+ "eval_runtime": 71.7993,
706
+ "eval_samples_per_second": 2.117,
707
+ "eval_steps_per_second": 0.265,
708
+ "eval_top1_accuracy": 0.993421052631579,
709
+ "step": 1064
710
+ },
711
+ {
712
+ "epoch": 29.0,
713
+ "train_accuracy": 0.9948830409356725
714
+ },
715
+ {
716
+ "epoch": 29.0,
717
+ "grad_norm": 0.6761703491210938,
718
+ "learning_rate": 1.826139824909036e-05,
719
+ "loss": 0.0155,
720
+ "step": 1102
721
+ },
722
+ {
723
+ "epoch": 29.0,
724
+ "eval_accuracy": 1.0,
725
+ "eval_error_rate": 0.0,
726
+ "eval_f1": 1.0,
727
+ "eval_loss": 0.012786287814378738,
728
+ "eval_precision": 1.0,
729
+ "eval_recall": 1.0,
730
+ "eval_runtime": 70.5861,
731
+ "eval_samples_per_second": 2.153,
732
+ "eval_steps_per_second": 0.269,
733
+ "eval_top1_accuracy": 1.0,
734
+ "step": 1102
735
+ },
736
+ {
737
+ "epoch": 30.0,
738
+ "train_accuracy": 0.9985380116959064
739
+ },
740
+ {
741
+ "epoch": 30.0,
742
+ "grad_norm": 0.24630075693130493,
743
+ "learning_rate": 1.8712004733869936e-05,
744
+ "loss": 0.0104,
745
+ "step": 1140
746
+ },
747
+ {
748
+ "epoch": 30.0,
749
+ "eval_accuracy": 0.9944444444444445,
750
+ "eval_error_rate": 0.005555555555555536,
751
+ "eval_f1": 0.99343678755752,
752
+ "eval_loss": 0.0159451961517334,
753
+ "eval_precision": 0.9936647173489279,
754
+ "eval_recall": 0.993421052631579,
755
+ "eval_runtime": 68.8635,
756
+ "eval_samples_per_second": 2.207,
757
+ "eval_steps_per_second": 0.276,
758
+ "eval_top1_accuracy": 0.993421052631579,
759
+ "step": 1140
760
+ },
761
+ {
762
+ "epoch": 30.0,
763
+ "step": 1140,
764
+ "total_flos": 9.167322602550067e+17,
765
+ "train_loss": 0.6629255272840199,
766
+ "train_runtime": 13519.4546,
767
+ "train_samples_per_second": 2.698,
768
+ "train_steps_per_second": 0.084
769
  }
770
  ],
771
+ "logging_steps": 10,
772
  "max_steps": 1140,
773
  "num_input_tokens_seen": 0,
774
  "num_train_epochs": 30,
 
780
  "early_stopping_threshold": 0.0
781
  },
782
  "attributes": {
783
+ "early_stopping_patience_counter": 0
784
  }
785
  },
786
  "TrainerControl": {
 
794
  "attributes": {}
795
  }
796
  },
797
+ "total_flos": 9.167322602550067e+17,
798
+ "train_batch_size": 8,
799
  "trial_name": null,
800
  "trial_params": null
801
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1b57bacbaaf9003431d769cf77cb4ade538a6e4c81615d2dad23b4a752322a3
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591d261e4698fb2aced103c9b4eff40a248d2fd6e9b86b4707a073f3e92348b1
3
+ size 5112