Model save

Browse files

Files changed (4) hide show

README.md +29 -29
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -3,24 +3,24 @@ library_name: transformers
 tags:
 - generated_from_trainer
 model-index:
-- name: vi-modernbert-VLSP2016_SA-ep20
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# vi-modernbert-VLSP2016_SA-ep20
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.9834
-- Micro F1: 74.4762
-- Micro Precision: 74.4762
-- Micro Recall: 74.4762
-- Macro F1: 74.4952
-- Macro Precision: 74.5167
-- Macro Recall: 74.4762
 ## Model description
@@ -55,26 +55,26 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
 |:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
-| 2.2395        | 1.0     | 80   | 0.6337          | 73.1429  | 73.1429         | 73.1429      | 73.2675  | 75.2452         | 73.1429      |
-| 1.0835        | 2.0     | 160  | 0.7826          | 73.9048  | 73.9048         | 73.9048      | 73.5690  | 73.7349         | 73.9048      |
-| 0.2647        | 3.0     | 240  | 1.3780          | 73.2381  | 73.2381         | 73.2381      | 73.1154  | 73.2290         | 73.2381      |
-| 0.3228        | 4.0     | 320  | 1.7462          | 72.7619  | 72.7619         | 72.7619      | 72.7257  | 72.7147         | 72.7619      |
-| 0.0747        | 5.0     | 400  | 1.7594          | 74.0     | 74.0            | 74.0         | 74.1705  | 75.0419         | 74.0000      |
-| 0.0868        | 6.0     | 480  | 1.5809          | 73.6190  | 73.6190         | 73.6190      | 73.2438  | 73.6195         | 73.6190      |
-| 0.0801        | 7.0     | 560  | 1.7074          | 74.3810  | 74.3810         | 74.3810      | 74.3996  | 74.4326         | 74.3810      |
-| 0.1489        | 8.0     | 640  | 3.0942          | 73.5238  | 73.5238         | 73.5238      | 73.7559  | 74.9712         | 73.5238      |
-| 0.1558        | 9.0     | 720  | 3.2612          | 72.3810  | 72.3810         | 72.3810      | 72.5636  | 74.4763         | 72.3810      |
-| 0.0           | 10.0    | 800  | 3.0012          | 74.8571  | 74.8571         | 74.8571      | 74.9893  | 75.3648         | 74.8571      |
-| 0.0           | 11.0    | 880  | 2.9445          | 74.0     | 74.0            | 74.0         | 73.9497  | 73.9142         | 74.0000      |
-| 0.0           | 12.0    | 960  | 2.9618          | 74.4762  | 74.4762         | 74.4762      | 74.4845  | 74.4934         | 74.4762      |
-| 0.0           | 13.0    | 1040 | 2.9744          | 74.4762  | 74.4762         | 74.4762      | 74.4952  | 74.5167         | 74.4762      |
-| 0.0           | 14.0    | 1120 | 2.9787          | 74.3810  | 74.3810         | 74.3810      | 74.3974  | 74.4156         | 74.3810      |
-| 0.0           | 15.0    | 1200 | 2.9775          | 74.5714  | 74.5714         | 74.5714      | 74.5929  | 74.6181         | 74.5714      |
-| 0.0           | 16.0    | 1280 | 2.9835          | 74.2857  | 74.2857         | 74.2857      | 74.3051  | 74.3270         | 74.2857      |
-| 0.0           | 17.0    | 1360 | 2.9836          | 74.4762  | 74.4762         | 74.4762      | 74.4869  | 74.4985         | 74.4762      |
-| 0.0           | 18.0    | 1440 | 2.9821          | 74.4762  | 74.4762         | 74.4762      | 74.4944  | 74.5153         | 74.4762      |
-| 0.0           | 19.0    | 1520 | 2.9836          | 74.5714  | 74.5714         | 74.5714      | 74.5929  | 74.6181         | 74.5714      |
-| 0.0           | 19.7524 | 1580 | 2.9834          | 74.4762  | 74.4762         | 74.4762      | 74.4952  | 74.5167         | 74.4762      |
 ### Framework versions

 tags:
 - generated_from_trainer
 model-index:
+- name: vi-modernbert-ViHSD-ep20
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# vi-modernbert-ViHSD-ep20
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6519
+- Micro F1: 87.5
+- Micro Precision: 87.5
+- Micro Recall: 87.5
+- Macro F1: 68.1809
+- Macro Precision: 70.5794
+- Macro Recall: 66.4832
 ## Model description
 | Training Loss | Epoch   | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
 |:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
+| 1.2008        | 0.9980  | 375  | 0.3767          | 86.7141  | 86.7141         | 86.7141      | 59.5450  | 76.5138         | 54.2867      |
+| 1.194         | 1.9980  | 750  | 0.3603          | 87.3503  | 87.3503         | 87.3503      | 68.2151  | 70.6976         | 66.1582      |
+| 0.5185        | 2.9980  | 1125 | 0.5565          | 86.0030  | 86.0030         | 86.0030      | 67.9724  | 66.8065         | 69.3709      |
+| 0.3474        | 3.9980  | 1500 | 0.6241          | 85.5539  | 85.5539         | 85.5539      | 65.7528  | 66.9509         | 66.2314      |
+| 0.2354        | 4.9980  | 1875 | 0.6098          | 86.7889  | 86.7889         | 86.7889      | 64.2029  | 67.9233         | 61.4523      |
+| 0.2857        | 5.9980  | 2250 | 0.8669          | 86.7889  | 86.7889         | 86.7889      | 67.2326  | 68.1950         | 67.1750      |
+| 0.1326        | 6.9980  | 2625 | 0.7455          | 87.1257  | 87.1257         | 87.1257      | 68.0176  | 69.7401         | 67.3727      |
+| 0.4085        | 7.9980  | 3000 | 0.9578          | 88.0240  | 88.0240         | 88.0240      | 67.6255  | 73.9917         | 64.5661      |
+| 0.0273        | 8.9980  | 3375 | 1.5414          | 87.1257  | 87.1257         | 87.1257      | 64.8080  | 70.6655         | 61.2109      |
+| 0.036         | 9.9980  | 3750 | 1.1192          | 87.5374  | 87.5374         | 87.5374      | 67.8021  | 71.5825         | 65.9373      |
+| 0.0748        | 10.9980 | 4125 | 1.2999          | 87.3503  | 87.3503         | 87.3503      | 67.6266  | 70.4011         | 66.5444      |
+| 0.0644        | 11.9980 | 4500 | 1.4459          | 87.5374  | 87.5374         | 87.5374      | 67.6215  | 71.4430         | 65.1796      |
+| 0.0201        | 12.9980 | 4875 | 1.5466          | 87.6497  | 87.6497         | 87.6497      | 67.8941  | 71.5368         | 65.5838      |
+| 0.01          | 13.9980 | 5250 | 1.5540          | 87.3877  | 87.3877         | 87.3877      | 68.4412  | 70.3740         | 67.0800      |
+| 0.0439        | 14.9980 | 5625 | 1.5876          | 87.5749  | 87.5749         | 87.5749      | 68.6453  | 70.7319         | 67.0817      |
+| 0.0628        | 15.9980 | 6000 | 1.6211          | 87.4626  | 87.4626         | 87.4626      | 67.8192  | 70.7271         | 65.8999      |
+| 0.0           | 16.9980 | 6375 | 1.6364          | 87.5749  | 87.5749         | 87.5749      | 68.5724  | 70.7215         | 66.9734      |
+| 0.0431        | 17.9980 | 6750 | 1.6461          | 87.5     | 87.5            | 87.5         | 68.1004  | 70.4974         | 66.3750      |
+| 0.0094        | 18.9980 | 7125 | 1.6505          | 87.4626  | 87.4626         | 87.4626      | 68.0533  | 70.4170         | 66.3597      |
+| 0.0174        | 19.9980 | 7500 | 1.6519          | 87.5     | 87.5            | 87.5         | 68.1809  | 70.5794         | 66.4832      |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "epoch": 19.752351097178682,
     "eval_loss": 2.983431816101074,
     "eval_macro_f1": 74.495166799055,
     "eval_macro_precision": 74.51671347356447,
@@ -20,8 +20,8 @@
     "test_runtime": 4.1608,
     "test_samples_per_second": 252.353,
     "test_steps_per_second": 15.862,
-    "train_loss": 0.2892922113949427,
-    "train_runtime": 830.7771,
-    "train_samples_per_second": 122.777,
-    "train_steps_per_second": 1.902
 }

 {
+    "epoch": 19.998003992015967,
     "eval_loss": 2.983431816101074,
     "eval_macro_f1": 74.495166799055,
     "eval_macro_precision": 74.51671347356447,
     "test_runtime": 4.1608,
     "test_samples_per_second": 252.353,
     "test_steps_per_second": 15.862,
+    "train_loss": 0.23192051490644613,
+    "train_runtime": 3495.581,
+    "train_samples_per_second": 137.579,
+    "train_steps_per_second": 2.146
 }

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "epoch": 19.752351097178682,
-    "train_loss": 0.2892922113949427,
-    "train_runtime": 830.7771,
-    "train_samples_per_second": 122.777,
-    "train_steps_per_second": 1.902
 }

 {
+    "epoch": 19.998003992015967,
+    "train_loss": 0.23192051490644613,
+    "train_runtime": 3495.581,
+    "train_samples_per_second": 137.579,
+    "train_steps_per_second": 2.146
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff