datht commited on
Commit
1786fb6
·
verified ·
1 Parent(s): 7dc99c6

Model save

Browse files
Files changed (4) hide show
  1. README.md +29 -29
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +0 -0
README.md CHANGED
@@ -3,24 +3,24 @@ library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
  model-index:
6
- - name: vi-modernbert-VLSP2016_SA-ep20
7
  results: []
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
  should probably proofread and complete it, then remove this comment. -->
12
 
13
- # vi-modernbert-VLSP2016_SA-ep20
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 2.9834
18
- - Micro F1: 74.4762
19
- - Micro Precision: 74.4762
20
- - Micro Recall: 74.4762
21
- - Macro F1: 74.4952
22
- - Macro Precision: 74.5167
23
- - Macro Recall: 74.4762
24
 
25
  ## Model description
26
 
@@ -55,26 +55,26 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
57
  |:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
58
- | 2.2395 | 1.0 | 80 | 0.6337 | 73.1429 | 73.1429 | 73.1429 | 73.2675 | 75.2452 | 73.1429 |
59
- | 1.0835 | 2.0 | 160 | 0.7826 | 73.9048 | 73.9048 | 73.9048 | 73.5690 | 73.7349 | 73.9048 |
60
- | 0.2647 | 3.0 | 240 | 1.3780 | 73.2381 | 73.2381 | 73.2381 | 73.1154 | 73.2290 | 73.2381 |
61
- | 0.3228 | 4.0 | 320 | 1.7462 | 72.7619 | 72.7619 | 72.7619 | 72.7257 | 72.7147 | 72.7619 |
62
- | 0.0747 | 5.0 | 400 | 1.7594 | 74.0 | 74.0 | 74.0 | 74.1705 | 75.0419 | 74.0000 |
63
- | 0.0868 | 6.0 | 480 | 1.5809 | 73.6190 | 73.6190 | 73.6190 | 73.2438 | 73.6195 | 73.6190 |
64
- | 0.0801 | 7.0 | 560 | 1.7074 | 74.3810 | 74.3810 | 74.3810 | 74.3996 | 74.4326 | 74.3810 |
65
- | 0.1489 | 8.0 | 640 | 3.0942 | 73.5238 | 73.5238 | 73.5238 | 73.7559 | 74.9712 | 73.5238 |
66
- | 0.1558 | 9.0 | 720 | 3.2612 | 72.3810 | 72.3810 | 72.3810 | 72.5636 | 74.4763 | 72.3810 |
67
- | 0.0 | 10.0 | 800 | 3.0012 | 74.8571 | 74.8571 | 74.8571 | 74.9893 | 75.3648 | 74.8571 |
68
- | 0.0 | 11.0 | 880 | 2.9445 | 74.0 | 74.0 | 74.0 | 73.9497 | 73.9142 | 74.0000 |
69
- | 0.0 | 12.0 | 960 | 2.9618 | 74.4762 | 74.4762 | 74.4762 | 74.4845 | 74.4934 | 74.4762 |
70
- | 0.0 | 13.0 | 1040 | 2.9744 | 74.4762 | 74.4762 | 74.4762 | 74.4952 | 74.5167 | 74.4762 |
71
- | 0.0 | 14.0 | 1120 | 2.9787 | 74.3810 | 74.3810 | 74.3810 | 74.3974 | 74.4156 | 74.3810 |
72
- | 0.0 | 15.0 | 1200 | 2.9775 | 74.5714 | 74.5714 | 74.5714 | 74.5929 | 74.6181 | 74.5714 |
73
- | 0.0 | 16.0 | 1280 | 2.9835 | 74.2857 | 74.2857 | 74.2857 | 74.3051 | 74.3270 | 74.2857 |
74
- | 0.0 | 17.0 | 1360 | 2.9836 | 74.4762 | 74.4762 | 74.4762 | 74.4869 | 74.4985 | 74.4762 |
75
- | 0.0 | 18.0 | 1440 | 2.9821 | 74.4762 | 74.4762 | 74.4762 | 74.4944 | 74.5153 | 74.4762 |
76
- | 0.0 | 19.0 | 1520 | 2.9836 | 74.5714 | 74.5714 | 74.5714 | 74.5929 | 74.6181 | 74.5714 |
77
- | 0.0 | 19.7524 | 1580 | 2.9834 | 74.4762 | 74.4762 | 74.4762 | 74.4952 | 74.5167 | 74.4762 |
78
 
79
 
80
  ### Framework versions
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
6
+ - name: vi-modernbert-ViHSD-ep20
7
  results: []
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
  should probably proofread and complete it, then remove this comment. -->
12
 
13
+ # vi-modernbert-ViHSD-ep20
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 1.6519
18
+ - Micro F1: 87.5
19
+ - Micro Precision: 87.5
20
+ - Micro Recall: 87.5
21
+ - Macro F1: 68.1809
22
+ - Macro Precision: 70.5794
23
+ - Macro Recall: 66.4832
24
 
25
  ## Model description
26
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
57
  |:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
58
+ | 1.2008 | 0.9980 | 375 | 0.3767 | 86.7141 | 86.7141 | 86.7141 | 59.5450 | 76.5138 | 54.2867 |
59
+ | 1.194 | 1.9980 | 750 | 0.3603 | 87.3503 | 87.3503 | 87.3503 | 68.2151 | 70.6976 | 66.1582 |
60
+ | 0.5185 | 2.9980 | 1125 | 0.5565 | 86.0030 | 86.0030 | 86.0030 | 67.9724 | 66.8065 | 69.3709 |
61
+ | 0.3474 | 3.9980 | 1500 | 0.6241 | 85.5539 | 85.5539 | 85.5539 | 65.7528 | 66.9509 | 66.2314 |
62
+ | 0.2354 | 4.9980 | 1875 | 0.6098 | 86.7889 | 86.7889 | 86.7889 | 64.2029 | 67.9233 | 61.4523 |
63
+ | 0.2857 | 5.9980 | 2250 | 0.8669 | 86.7889 | 86.7889 | 86.7889 | 67.2326 | 68.1950 | 67.1750 |
64
+ | 0.1326 | 6.9980 | 2625 | 0.7455 | 87.1257 | 87.1257 | 87.1257 | 68.0176 | 69.7401 | 67.3727 |
65
+ | 0.4085 | 7.9980 | 3000 | 0.9578 | 88.0240 | 88.0240 | 88.0240 | 67.6255 | 73.9917 | 64.5661 |
66
+ | 0.0273 | 8.9980 | 3375 | 1.5414 | 87.1257 | 87.1257 | 87.1257 | 64.8080 | 70.6655 | 61.2109 |
67
+ | 0.036 | 9.9980 | 3750 | 1.1192 | 87.5374 | 87.5374 | 87.5374 | 67.8021 | 71.5825 | 65.9373 |
68
+ | 0.0748 | 10.9980 | 4125 | 1.2999 | 87.3503 | 87.3503 | 87.3503 | 67.6266 | 70.4011 | 66.5444 |
69
+ | 0.0644 | 11.9980 | 4500 | 1.4459 | 87.5374 | 87.5374 | 87.5374 | 67.6215 | 71.4430 | 65.1796 |
70
+ | 0.0201 | 12.9980 | 4875 | 1.5466 | 87.6497 | 87.6497 | 87.6497 | 67.8941 | 71.5368 | 65.5838 |
71
+ | 0.01 | 13.9980 | 5250 | 1.5540 | 87.3877 | 87.3877 | 87.3877 | 68.4412 | 70.3740 | 67.0800 |
72
+ | 0.0439 | 14.9980 | 5625 | 1.5876 | 87.5749 | 87.5749 | 87.5749 | 68.6453 | 70.7319 | 67.0817 |
73
+ | 0.0628 | 15.9980 | 6000 | 1.6211 | 87.4626 | 87.4626 | 87.4626 | 67.8192 | 70.7271 | 65.8999 |
74
+ | 0.0 | 16.9980 | 6375 | 1.6364 | 87.5749 | 87.5749 | 87.5749 | 68.5724 | 70.7215 | 66.9734 |
75
+ | 0.0431 | 17.9980 | 6750 | 1.6461 | 87.5 | 87.5 | 87.5 | 68.1004 | 70.4974 | 66.3750 |
76
+ | 0.0094 | 18.9980 | 7125 | 1.6505 | 87.4626 | 87.4626 | 87.4626 | 68.0533 | 70.4170 | 66.3597 |
77
+ | 0.0174 | 19.9980 | 7500 | 1.6519 | 87.5 | 87.5 | 87.5 | 68.1809 | 70.5794 | 66.4832 |
78
 
79
 
80
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 19.752351097178682,
3
  "eval_loss": 2.983431816101074,
4
  "eval_macro_f1": 74.495166799055,
5
  "eval_macro_precision": 74.51671347356447,
@@ -20,8 +20,8 @@
20
  "test_runtime": 4.1608,
21
  "test_samples_per_second": 252.353,
22
  "test_steps_per_second": 15.862,
23
- "train_loss": 0.2892922113949427,
24
- "train_runtime": 830.7771,
25
- "train_samples_per_second": 122.777,
26
- "train_steps_per_second": 1.902
27
  }
 
1
  {
2
+ "epoch": 19.998003992015967,
3
  "eval_loss": 2.983431816101074,
4
  "eval_macro_f1": 74.495166799055,
5
  "eval_macro_precision": 74.51671347356447,
 
20
  "test_runtime": 4.1608,
21
  "test_samples_per_second": 252.353,
22
  "test_steps_per_second": 15.862,
23
+ "train_loss": 0.23192051490644613,
24
+ "train_runtime": 3495.581,
25
+ "train_samples_per_second": 137.579,
26
+ "train_steps_per_second": 2.146
27
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 19.752351097178682,
3
- "train_loss": 0.2892922113949427,
4
- "train_runtime": 830.7771,
5
- "train_samples_per_second": 122.777,
6
- "train_steps_per_second": 1.902
7
  }
 
1
  {
2
+ "epoch": 19.998003992015967,
3
+ "train_loss": 0.23192051490644613,
4
+ "train_runtime": 3495.581,
5
+ "train_samples_per_second": 137.579,
6
+ "train_steps_per_second": 2.146
7
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff