Model save
Browse files- README.md +29 -29
- all_results.json +5 -5
- train_results.json +5 -5
- trainer_state.json +0 -0
README.md
CHANGED
@@ -3,24 +3,24 @@ library_name: transformers
|
|
3 |
tags:
|
4 |
- generated_from_trainer
|
5 |
model-index:
|
6 |
-
- name: vi-modernbert-
|
7 |
results: []
|
8 |
---
|
9 |
|
10 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
11 |
should probably proofread and complete it, then remove this comment. -->
|
12 |
|
13 |
-
# vi-modernbert-
|
14 |
|
15 |
This model was trained from scratch on the None dataset.
|
16 |
It achieves the following results on the evaluation set:
|
17 |
-
- Loss:
|
18 |
-
- Micro F1:
|
19 |
-
- Micro Precision:
|
20 |
-
- Micro Recall:
|
21 |
-
- Macro F1:
|
22 |
-
- Macro Precision:
|
23 |
-
- Macro Recall:
|
24 |
|
25 |
## Model description
|
26 |
|
@@ -55,26 +55,26 @@ The following hyperparameters were used during training:
|
|
55 |
|
56 |
| Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
|
57 |
|:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
|
58 |
-
|
|
59 |
-
| 1.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.0 |
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
|
79 |
|
80 |
### Framework versions
|
|
|
3 |
tags:
|
4 |
- generated_from_trainer
|
5 |
model-index:
|
6 |
+
- name: vi-modernbert-ViHSD-ep20
|
7 |
results: []
|
8 |
---
|
9 |
|
10 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
11 |
should probably proofread and complete it, then remove this comment. -->
|
12 |
|
13 |
+
# vi-modernbert-ViHSD-ep20
|
14 |
|
15 |
This model was trained from scratch on the None dataset.
|
16 |
It achieves the following results on the evaluation set:
|
17 |
+
- Loss: 1.6519
|
18 |
+
- Micro F1: 87.5
|
19 |
+
- Micro Precision: 87.5
|
20 |
+
- Micro Recall: 87.5
|
21 |
+
- Macro F1: 68.1809
|
22 |
+
- Macro Precision: 70.5794
|
23 |
+
- Macro Recall: 66.4832
|
24 |
|
25 |
## Model description
|
26 |
|
|
|
55 |
|
56 |
| Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
|
57 |
|:-------------:|:-------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
|
58 |
+
| 1.2008 | 0.9980 | 375 | 0.3767 | 86.7141 | 86.7141 | 86.7141 | 59.5450 | 76.5138 | 54.2867 |
|
59 |
+
| 1.194 | 1.9980 | 750 | 0.3603 | 87.3503 | 87.3503 | 87.3503 | 68.2151 | 70.6976 | 66.1582 |
|
60 |
+
| 0.5185 | 2.9980 | 1125 | 0.5565 | 86.0030 | 86.0030 | 86.0030 | 67.9724 | 66.8065 | 69.3709 |
|
61 |
+
| 0.3474 | 3.9980 | 1500 | 0.6241 | 85.5539 | 85.5539 | 85.5539 | 65.7528 | 66.9509 | 66.2314 |
|
62 |
+
| 0.2354 | 4.9980 | 1875 | 0.6098 | 86.7889 | 86.7889 | 86.7889 | 64.2029 | 67.9233 | 61.4523 |
|
63 |
+
| 0.2857 | 5.9980 | 2250 | 0.8669 | 86.7889 | 86.7889 | 86.7889 | 67.2326 | 68.1950 | 67.1750 |
|
64 |
+
| 0.1326 | 6.9980 | 2625 | 0.7455 | 87.1257 | 87.1257 | 87.1257 | 68.0176 | 69.7401 | 67.3727 |
|
65 |
+
| 0.4085 | 7.9980 | 3000 | 0.9578 | 88.0240 | 88.0240 | 88.0240 | 67.6255 | 73.9917 | 64.5661 |
|
66 |
+
| 0.0273 | 8.9980 | 3375 | 1.5414 | 87.1257 | 87.1257 | 87.1257 | 64.8080 | 70.6655 | 61.2109 |
|
67 |
+
| 0.036 | 9.9980 | 3750 | 1.1192 | 87.5374 | 87.5374 | 87.5374 | 67.8021 | 71.5825 | 65.9373 |
|
68 |
+
| 0.0748 | 10.9980 | 4125 | 1.2999 | 87.3503 | 87.3503 | 87.3503 | 67.6266 | 70.4011 | 66.5444 |
|
69 |
+
| 0.0644 | 11.9980 | 4500 | 1.4459 | 87.5374 | 87.5374 | 87.5374 | 67.6215 | 71.4430 | 65.1796 |
|
70 |
+
| 0.0201 | 12.9980 | 4875 | 1.5466 | 87.6497 | 87.6497 | 87.6497 | 67.8941 | 71.5368 | 65.5838 |
|
71 |
+
| 0.01 | 13.9980 | 5250 | 1.5540 | 87.3877 | 87.3877 | 87.3877 | 68.4412 | 70.3740 | 67.0800 |
|
72 |
+
| 0.0439 | 14.9980 | 5625 | 1.5876 | 87.5749 | 87.5749 | 87.5749 | 68.6453 | 70.7319 | 67.0817 |
|
73 |
+
| 0.0628 | 15.9980 | 6000 | 1.6211 | 87.4626 | 87.4626 | 87.4626 | 67.8192 | 70.7271 | 65.8999 |
|
74 |
+
| 0.0 | 16.9980 | 6375 | 1.6364 | 87.5749 | 87.5749 | 87.5749 | 68.5724 | 70.7215 | 66.9734 |
|
75 |
+
| 0.0431 | 17.9980 | 6750 | 1.6461 | 87.5 | 87.5 | 87.5 | 68.1004 | 70.4974 | 66.3750 |
|
76 |
+
| 0.0094 | 18.9980 | 7125 | 1.6505 | 87.4626 | 87.4626 | 87.4626 | 68.0533 | 70.4170 | 66.3597 |
|
77 |
+
| 0.0174 | 19.9980 | 7500 | 1.6519 | 87.5 | 87.5 | 87.5 | 68.1809 | 70.5794 | 66.4832 |
|
78 |
|
79 |
|
80 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"epoch": 19.
|
3 |
"eval_loss": 2.983431816101074,
|
4 |
"eval_macro_f1": 74.495166799055,
|
5 |
"eval_macro_precision": 74.51671347356447,
|
@@ -20,8 +20,8 @@
|
|
20 |
"test_runtime": 4.1608,
|
21 |
"test_samples_per_second": 252.353,
|
22 |
"test_steps_per_second": 15.862,
|
23 |
-
"train_loss": 0.
|
24 |
-
"train_runtime":
|
25 |
-
"train_samples_per_second":
|
26 |
-
"train_steps_per_second":
|
27 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 19.998003992015967,
|
3 |
"eval_loss": 2.983431816101074,
|
4 |
"eval_macro_f1": 74.495166799055,
|
5 |
"eval_macro_precision": 74.51671347356447,
|
|
|
20 |
"test_runtime": 4.1608,
|
21 |
"test_samples_per_second": 252.353,
|
22 |
"test_steps_per_second": 15.862,
|
23 |
+
"train_loss": 0.23192051490644613,
|
24 |
+
"train_runtime": 3495.581,
|
25 |
+
"train_samples_per_second": 137.579,
|
26 |
+
"train_steps_per_second": 2.146
|
27 |
}
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch": 19.
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second":
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 19.998003992015967,
|
3 |
+
"train_loss": 0.23192051490644613,
|
4 |
+
"train_runtime": 3495.581,
|
5 |
+
"train_samples_per_second": 137.579,
|
6 |
+
"train_steps_per_second": 2.146
|
7 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|