Upload folder using huggingface_hub
Browse files- README.md +7 -4
- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- label_encoder.json +1 -1
- tokenizer.json +2 -2
- training_metrics/training_metrics.json +75 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
@@ -15,7 +15,7 @@ tags: []
|
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
17 |
|
18 |
-
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
21 |
- **Funded by [optional]:** [More Information Needed]
|
@@ -196,4 +196,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
196 |
|
197 |
## Model Card Contact
|
198 |
|
199 |
-
[More Information Needed]
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
17 |
|
18 |
+
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
21 |
- **Funded by [optional]:** [More Information Needed]
|
|
|
196 |
|
197 |
## Model Card Contact
|
198 |
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.14.0
|
adapter_config.json
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
"layers_pattern": null,
|
13 |
"layers_to_transform": null,
|
14 |
"loftq_config": {},
|
15 |
-
"lora_alpha":
|
16 |
"lora_bias": false,
|
17 |
"lora_dropout": 0,
|
18 |
"megatron_config": null,
|
@@ -26,13 +26,13 @@
|
|
26 |
"rank_pattern": {},
|
27 |
"revision": null,
|
28 |
"target_modules": [
|
|
|
|
|
29 |
"k_proj",
|
30 |
"o_proj",
|
31 |
"q_proj",
|
32 |
-
"gate_proj",
|
33 |
"v_proj",
|
34 |
-
"up_proj"
|
35 |
-
"down_proj"
|
36 |
],
|
37 |
"task_type": "SEQ_CLS",
|
38 |
"use_dora": false,
|
|
|
12 |
"layers_pattern": null,
|
13 |
"layers_to_transform": null,
|
14 |
"loftq_config": {},
|
15 |
+
"lora_alpha": 32,
|
16 |
"lora_bias": false,
|
17 |
"lora_dropout": 0,
|
18 |
"megatron_config": null,
|
|
|
26 |
"rank_pattern": {},
|
27 |
"revision": null,
|
28 |
"target_modules": [
|
29 |
+
"down_proj",
|
30 |
+
"gate_proj",
|
31 |
"k_proj",
|
32 |
"o_proj",
|
33 |
"q_proj",
|
|
|
34 |
"v_proj",
|
35 |
+
"up_proj"
|
|
|
36 |
],
|
37 |
"task_type": "SEQ_CLS",
|
38 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 74427312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efb07e686804e78f7c5243ee381b674ccf187fbcee1bca549761ebde137ec762
|
3 |
size 74427312
|
label_encoder.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"\u0401\u043c\u043a\u043e\u0441\u0442\u0438, \u0440\u0435\u0437\u0435\u0440\u0432\u0443\u0430\u0440\u044b": 0, "\u0410\u0440\u043c\u0430\u0442\u0443\u0440\u0430 \u0441\u0442\u0440\u043e\u0438\u0442\u0435\u043b\u044c\u043d\u0430\u044f": 1, "\u0410\u0441\u0444\u0430\u043b\u044c\u0442": 2, "\u0411\u0435\u0442\u043e\u043d": 3, "\u0411\u044b\u0442\u043e\u0432\u0430\u044f \u0442\u0435\u0445\u043d\u0438\u043a\u0430, \u043e\u0440\u0433\u0442\u0435\u0445\u0438\u043a\u0430": 4, "\u0412\u0430\u0433\u043e\u043d\u0447\u0438\u043a\u0438, \u043a\u043e\u043d\u0442\u0435\u0439\u043d\u0435\u0440\u044b, \u0431\u043b\u043e\u0447\u043d\u043e-\u043c\u043e\u0434\u0443\u043b\u044c\u043d\u044b\u0435 \u0437\u0434\u0430\u043d\u0438\u044f": 5, "\u0412\u043e\u0437\u0434\u0443\u0445\u043e\u0432\u043e\u0434\u044b": 6, "\u0412\u0441\u043f\u043e\u043c\u043e\u0433\u0430\u0442\u0435\u043b\u044c\u043d\u044b\u0435 \u041c\u0422\u0420": 7, "\u0413\u0430\u0437\u043e\u0445\u043e\u0434\u044b (\u0442)": 8, "\u0413\u0430\u0437\u043e\u0445\u043e\u0434\u044b (\u0448\u0442.)": 9, "\u0413\u0435\u043e\u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 10, "\u0413\u0435\u043e\u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u0448\u0442.)": 11, "\u0414\u0435\u0440\u0435\u0432\u044c\u044f, \u043a\u0443\u0441\u0442\u0430\u0440\u043d\u0438\u043a\u0438, \u0442\u0440\u0430\u0432\u044b": 12, "\u0416\u0411\u0418": 13, "\u0417\u0418\u041f": 14, "\u0417\u0420\u0410": 15, "\u0418\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0441\u0438\u0441\u0442\u0435\u043c \u0432\u043e\u0434\u043e\u043e\u0442\u0432\u0435\u0434\u0435\u043d\u0438\u044f": 16, "\u0418\u043d\u0435\u0440\u0442\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 17, "\u0418\u043d\u0441\u0442\u0440\u0443\u043c\u0435\u043d\u0442, \u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c\u044b": 18, "\u041a\u0418\u041f\u0438\u0410 \u0438 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0442\u0443\u044e\u0449\u0438\u0435": 19, "\u041a\u0430\u0431\u0435\u043b\u0435\u043d\u0435\u0441\u0443\u0449\u0438\u0435 \u0441\u0438\u0441\u0442\u0435\u043c\u044b": 20, "\u041a\u0430\u0431\u0435\u043b\u0435\u043d\u0435\u0441\u0443\u0449\u0438\u0435 \u0441\u0438\u0441\u0442\u0435\u043c\u044b (\u043c)": 21, "\u041a\u0430\u0431\u0435\u043b\u044c\u043d\u0430\u044f \u0430\u0440\u043c\u0430\u0442\u0443\u0440\u0430 (\u041a\u0410)": 22, "\u041a\u0430\u0431\u0435\u043b\u044c\u043d\u043e-\u043f\u0440\u043e\u0432\u043e\u0434\u043d\u0438\u043a\u043e\u0432\u0430\u044f \u043f\u0440\u043e\u0434\u0443\u043a\u0446\u0438\u044f": 23, "\u041a\u0438\u0440\u043f\u0438\u0447, \u0433\u0430\u0437\u043e\u0431\u0435\u0442\u043e\u043d\u043d\u044b\u0435, \u043a\u0435\u0440\u0430\u043c\u0437\u0438\u0442\u043e\u0431\u0435\u0442\u043e\u043d\u043d\u044b\u0435 \u0431\u043b\u043e\u043a\u0438": 24, "\u041a\u043e\u043b\u043e\u0434\u0446\u044b \u043e\u0431\u0441\u043b\u0443\u0436\u0438\u0432\u0430\u043d\u0438\u044f, \u043f\u0440\u0438\u044f\u043c\u043a\u0438 \u0438 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0442\u0443\u044e\u0449\u0438\u0435": 25, "\u041a\u043e\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u043a\u0440\u043e\u0432\u0435\u043b\u044c\u043d\u043e-\u0444\u0430\u0441\u0430\u0434\u043d\u044b\u0435": 26, "\u041a\u0440\u0435\u043f\u0435\u0436 \u0438 \u043c\u0435\u0442\u0438\u0437\u044b (\u0432\u0435\u0441\u043e\u0432\u044b\u0435)": 27, "\u041a\u0440\u0435\u043f\u0435\u0436 \u0438 \u043c\u0435\u0442\u0438\u0437\u044b (\u0448\u0442.)": 28, "\u041b\u041a\u041c": 29, "\u041c\u0430\u043b\u044b\u0435 \u0430\u0440\u0445\u0438\u0442\u0435\u043a\u0442\u0443\u0440\u043d\u044b\u0435 \u0444\u043e\u0440\u043c\u044b (\u041c\u0410\u0424)": 30, "\u041c\u0430\u0440\u043a\u0438\u0440\u043e\u0432\u043e\u0447\u043d\u044b\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 31, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0410\u041a\u0417, \u041e\u0413\u0417 (\u043a\u0433)": 32, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0410\u041a\u0417, \u041e\u0413\u0417 (\u043c2)": 33, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0433\u0438\u0434\u0440\u043e-\u043f\u0430\u0440\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u043a\u0433)": 34, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0433\u0438\u0434\u0440\u043e-\u043f\u0430\u0440\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u043c2)": 35, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0434\u043b\u044f \u0431\u043b\u0430\u0433\u043e\u0443\u0441\u0442\u0440\u043e\u0439\u0441\u0442\u0432\u0430": 36, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0441\u0438\u0441\u0442\u0435\u043c \u041e\u0412\u0438\u041a": 37, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0442\u0435\u043f\u043b\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u0432\u0435\u0441\u043e\u0432\u044b\u0435)": 38, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0442\u0435\u043f\u043b\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u043c)": 39, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0442\u0435\u043f\u043b\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u043c2)": 40, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0442\u0435\u043f\u043b\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u043c3)": 41, "\u041c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0442\u0435\u043f\u043b\u043e\u0438\u0437\u043e\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0435 (\u0448\u0442.)": 42, "\u041c\u0435\u0431\u0435\u043b\u044c \u0438 \u0434\u0435\u043a\u043e\u0440\u0430\u0442\u0438\u0432\u043d\u044b\u0435 \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u044b": 43, "\u041c\u0435\u0431\u0435\u043b\u044c \u043f\u043e \u0434\u0438\u0437\u0430\u0439\u043d \u043f\u0440\u043e\u0435\u043a\u0442\u0443": 44, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u0438\u0437\u0434\u0435\u043b\u0438\u044f": 45, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u0438\u0437\u0434\u0435\u043b\u0438\u044f (\u043c)": 46, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u043a\u043e\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438": 47, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u043f\u0440\u043e\u043a\u0430\u0442 (\u043c)": 48, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u043f\u0440\u043e\u043a\u0430\u0442 (\u043c2)": 49, "\u041c\u0435\u0442\u0430\u043b\u043b\u043e\u043f\u0440\u043e\u043a\u0430\u0442 \u0432\u0435\u0441\u043e\u0432\u043e\u0439": 50, "\u041e\u0414\u0426\u0418": 51, "\u041e\u0431\u043e\u0440\u0443\u0434\u043e\u0432\u0430\u043d\u0438\u0435": 52, "\u041e\u0431\u043e\u0440\u0443\u0434\u043e\u0432\u0430\u043d\u0438\u0435 \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u0421\u041a\u0423\u0414 ": 53, "\u041e\u0431\u043e\u0440\u0443\u0434\u043e\u0432\u0430\u043d\u0438\u0435 \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b \u043f\u0440\u043e\u0442\u0438\u0432\u043e\u043f\u043e\u0436\u0430\u0440\u043d\u044b\u0435": 54, "\u041e\u0431\u043e\u0440\u0443\u0434\u043e\u0432\u0430\u043d\u0438\u0435 \u0441\u0438\u0441\u0442\u0435\u043c \u041e\u0412\u0438\u041a": 55, "\u041e\u0431\u0449\u0435\u0441\u0442\u0440\u043e\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 56, "\u041e\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u044f \u0438 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0442\u0443\u044e\u0449\u0438\u0435": 57, "\u041e\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u044f \u0438 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0442\u0443\u044e\u0449\u0438\u0435 (\u043c)": 58, "\u041e\u043a\u043d\u0430, \u0434\u0432\u0435\u0440\u0438, \u0432\u043e\u0440\u043e\u0442\u0430 \u0438 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0442\u0443\u044e\u0449\u0438\u0435": 59, "\u041e\u043f\u043e\u0440\u044b": 60, "\u041e\u0442\u0434\u0435\u043b\u043e\u0447\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u0432\u0435\u0441\u043e\u0432\u044b\u0435)": 61, "\u041e\u0442\u0434\u0435\u043b\u043e\u0447\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u043c)": 62, "\u041e\u0442\u0434\u0435\u043b\u043e\u0447\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u043c2)": 63, "\u041e\u0442\u0434\u0435\u043b\u043e\u0447\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u0448\u0442.)": 64, "\u041f\u0438\u043b\u043e\u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 65, "\u041f\u0440\u0435\u0434\u0443\u043f\u0440\u0435\u0436\u0434\u0430\u044e\u0449\u0438\u0435 \u0437\u043d\u0430\u043a\u0438 \u0431\u0435\u0437\u043e\u043f\u0430\u0441\u043d\u043e\u0441\u0442\u0438": 66, "\u0420\u0430\u0441\u0445\u043e\u0434\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 67, "\u0420\u0435\u0437\u0438\u043d\u043e\u0442\u0435\u0445\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f (\u0420\u0422\u0418)": 68, "\u0421\u0418\u0417": 69, "\u0421\u0430\u043d\u0442\u0435\u0445\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 70, "\u0421\u0432\u0430\u0440\u043e\u0447\u043d\u044b\u0435 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 71, "\u0421\u043c\u0435\u0441\u0438, \u0440\u0430\u0441\u0442\u0432\u043e\u0440\u044b \u0441\u0442\u0440\u043e\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435, \u043a\u043b\u0435\u0439 (\u0432\u0435\u0441\u043e\u0432\u044b\u0435)": 72, "\u0421\u043c\u0435\u0441\u0438, \u0440\u0430\u0441\u0442\u0432\u043e\u0440\u044b \u0441\u0442\u0440\u043e\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435, \u043a\u043b\u0435\u0439 (\u043c3)": 73, "\u0421\u043e\u0435\u0434\u0438\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435 \u0434\u0435\u0442\u0430\u043b\u0438 \u0442\u0440\u0443\u0431\u043e\u043f\u0440\u043e\u0432\u043e\u0434\u043e\u0432 (\u0421\u0414\u0422)": 74, "\u0421\u044d\u043d\u0434\u0432\u0438\u0447-\u043f\u0430\u043d\u0435\u043b\u0438 (\u043c2)": 75, "\u0421\u044d\u043d\u0434\u0432\u0438\u0447-\u043f\u0430\u043d\u0435\u043b\u0438 (\u0448\u0442.)": 76, "\u0422\u0440\u0443\u0431\u0430 \u041f\u0412\u0425, \u041f\u042d, \u041f\u041f, \u041f\u041d\u0414, \u043c\u0435\u0442\u0430\u043b\u043b\u043e\u043f\u043e\u043bu\u043c\u0435\u0440\u043d\u0430\u044f \u0438 \u0442.\u043f.": 77, "\u0422\u0440\u0443\u0431\u0430 \u043f\u0440\u043e\u0447\u0430\u044f (\u043c\u0435\u0434\u043d\u0430\u044f, \u0447\u0443\u0433\u0443\u043d\u043d\u0430\u044f, \u0425\u0426)": 78, "\u0422\u0440\u0443\u0431\u0430 \u0441\u0442\u0430\u043b\u044c\u043d\u0430\u044f (\u043c)": 79, "\u0422\u0440\u0443\u0431\u0430 \u0441\u0442\u0430\u043b\u044c\u043d\u0430\u044f (\u0442)": 80, "\u042d\u043b\u0435\u043a\u0442\u0440\u043e\u043c\u043e\u043d\u0442\u0430\u0436\u043d\u044b\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b (\u043c)": 81, "\u042d\u043b\u0435\u043a\u0442\u0440\u043e\u043c\u043e\u043d\u0442\u0430\u0436\u043d\u044b\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 82, "\u042d\u043b\u0435\u043a\u0442\u0440\u043e\u0442\u0435\u0445\u043d\u0438\u043a\u0430, \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043e\u0447\u043d\u044b\u0435 \u0438\u0437\u0434\u0435\u043b\u0438\u044f \u0438 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u044b": 83}
|
|
|
1 |
+
null
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
|
3 |
+
size 11422778
|
training_metrics/training_metrics.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"training_sessions": [
|
3 |
+
{
|
4 |
+
"session_id": 1,
|
5 |
+
"epochs": [],
|
6 |
+
"parameters": {
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"batch_size": 4,
|
9 |
+
"num_epochs": 3,
|
10 |
+
"warmup_steps": 0,
|
11 |
+
"gradient_accumulation_steps": 2,
|
12 |
+
"weight_decay": 0.01,
|
13 |
+
"lr_scheduler_type": "cosine",
|
14 |
+
"clip_grad_norm": 1.0,
|
15 |
+
"save_path": "/content/drive/MyDrive/deepseek_txt_classifier"
|
16 |
+
},
|
17 |
+
"training_started": "2025-01-30T19:27:27.458361",
|
18 |
+
"training_ended": null
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"session_id": 2,
|
22 |
+
"epochs": [
|
23 |
+
{
|
24 |
+
"epoch": 1,
|
25 |
+
"timestamp": "2025-01-30T19:33:18.075195",
|
26 |
+
"train_loss": 4.0756168969514865,
|
27 |
+
"val_loss": 3.061457032888708,
|
28 |
+
"val_accuracy": 0.26017699115044246,
|
29 |
+
"f1_weighted": 0.2289007857934706,
|
30 |
+
"precision_weighted": 0.3235188957013108,
|
31 |
+
"recall_weighted": 0.26017699115044246
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2,
|
35 |
+
"timestamp": "2025-01-30T19:33:18.075195",
|
36 |
+
"train_loss": 2.3547961317182544,
|
37 |
+
"val_loss": 2.5218266886724554,
|
38 |
+
"val_accuracy": 0.4088495575221239,
|
39 |
+
"f1_weighted": 0.3899156502168876,
|
40 |
+
"precision_weighted": 0.4584804625070112,
|
41 |
+
"recall_weighted": 0.4088495575221239
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 3,
|
45 |
+
"timestamp": "2025-01-30T19:33:18.075195",
|
46 |
+
"train_loss": 1.879219204586888,
|
47 |
+
"val_loss": 2.4548432421936117,
|
48 |
+
"val_accuracy": 0.42123893805309737,
|
49 |
+
"f1_weighted": 0.40484596795481426,
|
50 |
+
"precision_weighted": 0.4437322769455818,
|
51 |
+
"recall_weighted": 0.42123893805309737
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"parameters": {
|
55 |
+
"learning_rate": 0.0002,
|
56 |
+
"batch_size": 4,
|
57 |
+
"num_epochs": 3,
|
58 |
+
"warmup_steps": 0,
|
59 |
+
"gradient_accumulation_steps": 2,
|
60 |
+
"weight_decay": 0.01,
|
61 |
+
"lr_scheduler_type": "cosine",
|
62 |
+
"clip_grad_norm": 1.0,
|
63 |
+
"save_path": "/content/drive/MyDrive/deepseek_txt_classifier"
|
64 |
+
},
|
65 |
+
"training_started": "2025-01-30T19:33:18.075195",
|
66 |
+
"training_ended": "2025-01-30T19:33:18.075195",
|
67 |
+
"final_metrics": {
|
68 |
+
"best_val_loss": 2.4548432421936117,
|
69 |
+
"total_epochs": 3,
|
70 |
+
"early_stopped": false
|
71 |
+
}
|
72 |
+
}
|
73 |
+
],
|
74 |
+
"current_session": null
|
75 |
+
}
|