Upload folder using huggingface_hub
Browse files- checkpoint-50/adapter_config.json +2 -2
- checkpoint-50/adapter_model.safetensors +1 -1
- checkpoint-50/optimizer.pt +1 -1
- checkpoint-50/rng_state_0.pth +2 -2
- checkpoint-50/rng_state_1.pth +2 -2
- checkpoint-50/rng_state_2.pth +1 -1
- checkpoint-50/rng_state_3.pth +1 -1
- checkpoint-50/trainer_state.json +34 -34
- checkpoint-50/training_args.bin +1 -1
checkpoint-50/adapter_config.json
CHANGED
@@ -19,9 +19,9 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"q_proj",
|
23 |
-
"k_proj",
|
24 |
"v_proj",
|
|
|
|
|
25 |
"o_proj"
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
|
|
|
|
22 |
"v_proj",
|
23 |
+
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj"
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
checkpoint-50/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 436242776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8124a081a7ccfa909b9e072bfa0377d8895934427a97f45a78b899294c019006
|
3 |
size 436242776
|
checkpoint-50/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 872568314
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ea58ef1f259cc64213e083069205c6a7c87f3bda280b0cedd8d82aa9ea9a001
|
3 |
size 872568314
|
checkpoint-50/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37f8cf6ff0104f126e962b837e6aadc34920baa4dba5c947d76a4d280d03d435
|
3 |
+
size 15024
|
checkpoint-50/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:522e793c65745183643c72542cf75d1f9178a0262c7be626f122c78da45ba43b
|
3 |
+
size 15024
|
checkpoint-50/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715a6d2d00c49a8f827f26e430939815ec4b310cc1838bb170e0cd29a335df83
|
3 |
size 15024
|
checkpoint-50/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56b9eef68233418cf695ee351c740b2435854b7f56a1de55cda0a2cb934925b9
|
3 |
size 15024
|
checkpoint-50/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-50",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 50,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,57 +9,57 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second":
|
16 |
-
"eval_steps_per_second": 0.
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
-
"epoch": 0.
|
21 |
-
"grad_norm": 0.
|
22 |
"learning_rate": 0.00019510565162951537,
|
23 |
-
"loss": 0.
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"eval_loss": 0.
|
29 |
-
"eval_runtime":
|
30 |
-
"eval_samples_per_second":
|
31 |
-
"eval_steps_per_second": 0.
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
-
"epoch": 0.
|
36 |
-
"eval_loss": 0.
|
37 |
-
"eval_runtime":
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second": 0.
|
40 |
"step": 30
|
41 |
},
|
42 |
{
|
43 |
-
"epoch": 0.
|
44 |
-
"grad_norm": 0.
|
45 |
"learning_rate": 0.0001529919264233205,
|
46 |
-
"loss": 0.
|
47 |
"step": 40
|
48 |
},
|
49 |
{
|
50 |
-
"epoch": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_runtime":
|
53 |
-
"eval_samples_per_second":
|
54 |
-
"eval_steps_per_second": 0.
|
55 |
"step": 40
|
56 |
},
|
57 |
{
|
58 |
-
"epoch": 0.
|
59 |
-
"eval_loss": 0.
|
60 |
-
"eval_runtime":
|
61 |
-
"eval_samples_per_second":
|
62 |
-
"eval_steps_per_second": 0.
|
63 |
"step": 50
|
64 |
}
|
65 |
],
|
@@ -68,7 +68,7 @@
|
|
68 |
"num_input_tokens_seen": 0,
|
69 |
"num_train_epochs": 1,
|
70 |
"save_steps": 10,
|
71 |
-
"total_flos":
|
72 |
"train_batch_size": 16,
|
73 |
"trial_name": null,
|
74 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.40609535574913025,
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-50",
|
4 |
+
"epoch": 0.49504950495049505,
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 50,
|
7 |
"is_hyper_param_search": false,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.1,
|
13 |
+
"eval_loss": 0.6292536854743958,
|
14 |
+
"eval_runtime": 93.2576,
|
15 |
+
"eval_samples_per_second": 30.743,
|
16 |
+
"eval_steps_per_second": 0.965,
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
+
"epoch": 0.2,
|
21 |
+
"grad_norm": 0.24918170273303986,
|
22 |
"learning_rate": 0.00019510565162951537,
|
23 |
+
"loss": 0.9604,
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.2,
|
28 |
+
"eval_loss": 0.46683359146118164,
|
29 |
+
"eval_runtime": 93.2154,
|
30 |
+
"eval_samples_per_second": 30.757,
|
31 |
+
"eval_steps_per_second": 0.966,
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
+
"epoch": 0.3,
|
36 |
+
"eval_loss": 0.4341636002063751,
|
37 |
+
"eval_runtime": 93.1765,
|
38 |
+
"eval_samples_per_second": 30.77,
|
39 |
+
"eval_steps_per_second": 0.966,
|
40 |
"step": 30
|
41 |
},
|
42 |
{
|
43 |
+
"epoch": 0.4,
|
44 |
+
"grad_norm": 0.16474689543247223,
|
45 |
"learning_rate": 0.0001529919264233205,
|
46 |
+
"loss": 0.4381,
|
47 |
"step": 40
|
48 |
},
|
49 |
{
|
50 |
+
"epoch": 0.4,
|
51 |
+
"eval_loss": 0.40781059861183167,
|
52 |
+
"eval_runtime": 93.1933,
|
53 |
+
"eval_samples_per_second": 30.764,
|
54 |
+
"eval_steps_per_second": 0.966,
|
55 |
"step": 40
|
56 |
},
|
57 |
{
|
58 |
+
"epoch": 0.5,
|
59 |
+
"eval_loss": 0.40609535574913025,
|
60 |
+
"eval_runtime": 93.1926,
|
61 |
+
"eval_samples_per_second": 30.764,
|
62 |
+
"eval_steps_per_second": 0.966,
|
63 |
"step": 50
|
64 |
}
|
65 |
],
|
|
|
68 |
"num_input_tokens_seen": 0,
|
69 |
"num_train_epochs": 1,
|
70 |
"save_steps": 10,
|
71 |
+
"total_flos": 4.5255742262450586e+17,
|
72 |
"train_batch_size": 16,
|
73 |
"trial_name": null,
|
74 |
"trial_params": null
|
checkpoint-50/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf4ca86cd3cb3688a488e999435cb5aec8a4b0702902c7de81df751b2180e05
|
3 |
size 5176
|