Training in progress, step 3400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 156926880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be5c25a42de283fbf8eb99b28b840ceac13edfa648e54693d969da0ac35b6f90
|
3 |
size 156926880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 79968964
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6b46952b2970dd5f940fb5979a93fb84e7c8a0307f250f431a458b33f9ecc5
|
3 |
size 79968964
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f50b8abfcdca92497b65442d043585c92e790c298a8a67cbb11929dfaf245bdd
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194f8110cd3619fdd5e51c1ecd5420a9a4610c775caa871ab34cb47613afe4b2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.630952835083008,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22543,6 +22543,1414 @@
|
|
22543 |
"eval_samples_per_second": 26.169,
|
22544 |
"eval_steps_per_second": 6.542,
|
22545 |
"step": 3200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22546 |
}
|
22547 |
],
|
22548 |
"logging_steps": 1,
|
@@ -22557,7 +23965,7 @@
|
|
22557 |
"early_stopping_threshold": 0.0
|
22558 |
},
|
22559 |
"attributes": {
|
22560 |
-
"early_stopping_patience_counter":
|
22561 |
}
|
22562 |
},
|
22563 |
"TrainerControl": {
|
@@ -22566,12 +23974,12 @@
|
|
22566 |
"should_evaluate": false,
|
22567 |
"should_log": false,
|
22568 |
"should_save": true,
|
22569 |
-
"should_training_stop":
|
22570 |
},
|
22571 |
"attributes": {}
|
22572 |
}
|
22573 |
},
|
22574 |
-
"total_flos": 1.
|
22575 |
"train_batch_size": 4,
|
22576 |
"trial_name": null,
|
22577 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.630952835083008,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
4 |
+
"epoch": 0.5894590846047156,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 3400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22543 |
"eval_samples_per_second": 26.169,
|
22544 |
"eval_steps_per_second": 6.542,
|
22545 |
"step": 3200
|
22546 |
+
},
|
22547 |
+
{
|
22548 |
+
"epoch": 0.5549583911234397,
|
22549 |
+
"grad_norm": 4.695315361022949,
|
22550 |
+
"learning_rate": 0.00018382642193164083,
|
22551 |
+
"loss": 2.3062,
|
22552 |
+
"step": 3201
|
22553 |
+
},
|
22554 |
+
{
|
22555 |
+
"epoch": 0.555131761442441,
|
22556 |
+
"grad_norm": 5.645510673522949,
|
22557 |
+
"learning_rate": 0.00018381650391835158,
|
22558 |
+
"loss": 2.7282,
|
22559 |
+
"step": 3202
|
22560 |
+
},
|
22561 |
+
{
|
22562 |
+
"epoch": 0.5553051317614425,
|
22563 |
+
"grad_norm": 5.173983573913574,
|
22564 |
+
"learning_rate": 0.0001838065831327417,
|
22565 |
+
"loss": 2.7072,
|
22566 |
+
"step": 3203
|
22567 |
+
},
|
22568 |
+
{
|
22569 |
+
"epoch": 0.5554785020804438,
|
22570 |
+
"grad_norm": 3.9884448051452637,
|
22571 |
+
"learning_rate": 0.00018379665957513934,
|
22572 |
+
"loss": 2.3285,
|
22573 |
+
"step": 3204
|
22574 |
+
},
|
22575 |
+
{
|
22576 |
+
"epoch": 0.5556518723994452,
|
22577 |
+
"grad_norm": 4.842416286468506,
|
22578 |
+
"learning_rate": 0.0001837867332458727,
|
22579 |
+
"loss": 2.8464,
|
22580 |
+
"step": 3205
|
22581 |
+
},
|
22582 |
+
{
|
22583 |
+
"epoch": 0.5558252427184466,
|
22584 |
+
"grad_norm": 4.65514612197876,
|
22585 |
+
"learning_rate": 0.00018377680414527014,
|
22586 |
+
"loss": 2.2799,
|
22587 |
+
"step": 3206
|
22588 |
+
},
|
22589 |
+
{
|
22590 |
+
"epoch": 0.555998613037448,
|
22591 |
+
"grad_norm": 4.60508394241333,
|
22592 |
+
"learning_rate": 0.0001837668722736601,
|
22593 |
+
"loss": 2.7122,
|
22594 |
+
"step": 3207
|
22595 |
+
},
|
22596 |
+
{
|
22597 |
+
"epoch": 0.5561719833564494,
|
22598 |
+
"grad_norm": 4.336683750152588,
|
22599 |
+
"learning_rate": 0.00018375693763137102,
|
22600 |
+
"loss": 2.6092,
|
22601 |
+
"step": 3208
|
22602 |
+
},
|
22603 |
+
{
|
22604 |
+
"epoch": 0.5563453536754508,
|
22605 |
+
"grad_norm": 5.04818058013916,
|
22606 |
+
"learning_rate": 0.00018374700021873158,
|
22607 |
+
"loss": 2.9201,
|
22608 |
+
"step": 3209
|
22609 |
+
},
|
22610 |
+
{
|
22611 |
+
"epoch": 0.5565187239944521,
|
22612 |
+
"grad_norm": 4.844702243804932,
|
22613 |
+
"learning_rate": 0.0001837370600360704,
|
22614 |
+
"loss": 2.7589,
|
22615 |
+
"step": 3210
|
22616 |
+
},
|
22617 |
+
{
|
22618 |
+
"epoch": 0.5566920943134536,
|
22619 |
+
"grad_norm": 5.481187343597412,
|
22620 |
+
"learning_rate": 0.0001837271170837163,
|
22621 |
+
"loss": 2.4531,
|
22622 |
+
"step": 3211
|
22623 |
+
},
|
22624 |
+
{
|
22625 |
+
"epoch": 0.5568654646324549,
|
22626 |
+
"grad_norm": 4.967885494232178,
|
22627 |
+
"learning_rate": 0.00018371717136199812,
|
22628 |
+
"loss": 2.7186,
|
22629 |
+
"step": 3212
|
22630 |
+
},
|
22631 |
+
{
|
22632 |
+
"epoch": 0.5570388349514563,
|
22633 |
+
"grad_norm": 4.850007057189941,
|
22634 |
+
"learning_rate": 0.00018370722287124486,
|
22635 |
+
"loss": 2.5029,
|
22636 |
+
"step": 3213
|
22637 |
+
},
|
22638 |
+
{
|
22639 |
+
"epoch": 0.5572122052704577,
|
22640 |
+
"grad_norm": 5.334954738616943,
|
22641 |
+
"learning_rate": 0.00018369727161178555,
|
22642 |
+
"loss": 2.9814,
|
22643 |
+
"step": 3214
|
22644 |
+
},
|
22645 |
+
{
|
22646 |
+
"epoch": 0.5573855755894591,
|
22647 |
+
"grad_norm": 5.184202194213867,
|
22648 |
+
"learning_rate": 0.0001836873175839494,
|
22649 |
+
"loss": 2.8149,
|
22650 |
+
"step": 3215
|
22651 |
+
},
|
22652 |
+
{
|
22653 |
+
"epoch": 0.5575589459084604,
|
22654 |
+
"grad_norm": 4.444332599639893,
|
22655 |
+
"learning_rate": 0.00018367736078806556,
|
22656 |
+
"loss": 2.3719,
|
22657 |
+
"step": 3216
|
22658 |
+
},
|
22659 |
+
{
|
22660 |
+
"epoch": 0.5577323162274619,
|
22661 |
+
"grad_norm": 4.181465148925781,
|
22662 |
+
"learning_rate": 0.00018366740122446342,
|
22663 |
+
"loss": 2.3888,
|
22664 |
+
"step": 3217
|
22665 |
+
},
|
22666 |
+
{
|
22667 |
+
"epoch": 0.5579056865464632,
|
22668 |
+
"grad_norm": 4.603883743286133,
|
22669 |
+
"learning_rate": 0.00018365743889347237,
|
22670 |
+
"loss": 2.3793,
|
22671 |
+
"step": 3218
|
22672 |
+
},
|
22673 |
+
{
|
22674 |
+
"epoch": 0.5580790568654647,
|
22675 |
+
"grad_norm": 5.169532775878906,
|
22676 |
+
"learning_rate": 0.00018364747379542195,
|
22677 |
+
"loss": 2.7306,
|
22678 |
+
"step": 3219
|
22679 |
+
},
|
22680 |
+
{
|
22681 |
+
"epoch": 0.558252427184466,
|
22682 |
+
"grad_norm": 5.546294212341309,
|
22683 |
+
"learning_rate": 0.00018363750593064179,
|
22684 |
+
"loss": 2.5412,
|
22685 |
+
"step": 3220
|
22686 |
+
},
|
22687 |
+
{
|
22688 |
+
"epoch": 0.5584257975034674,
|
22689 |
+
"grad_norm": 4.673266410827637,
|
22690 |
+
"learning_rate": 0.00018362753529946153,
|
22691 |
+
"loss": 2.4176,
|
22692 |
+
"step": 3221
|
22693 |
+
},
|
22694 |
+
{
|
22695 |
+
"epoch": 0.5585991678224688,
|
22696 |
+
"grad_norm": 4.631234169006348,
|
22697 |
+
"learning_rate": 0.000183617561902211,
|
22698 |
+
"loss": 2.5067,
|
22699 |
+
"step": 3222
|
22700 |
+
},
|
22701 |
+
{
|
22702 |
+
"epoch": 0.5587725381414702,
|
22703 |
+
"grad_norm": 4.7319207191467285,
|
22704 |
+
"learning_rate": 0.00018360758573922006,
|
22705 |
+
"loss": 2.8722,
|
22706 |
+
"step": 3223
|
22707 |
+
},
|
22708 |
+
{
|
22709 |
+
"epoch": 0.5589459084604715,
|
22710 |
+
"grad_norm": 4.840054988861084,
|
22711 |
+
"learning_rate": 0.00018359760681081869,
|
22712 |
+
"loss": 2.7877,
|
22713 |
+
"step": 3224
|
22714 |
+
},
|
22715 |
+
{
|
22716 |
+
"epoch": 0.559119278779473,
|
22717 |
+
"grad_norm": 4.6929612159729,
|
22718 |
+
"learning_rate": 0.00018358762511733694,
|
22719 |
+
"loss": 2.7362,
|
22720 |
+
"step": 3225
|
22721 |
+
},
|
22722 |
+
{
|
22723 |
+
"epoch": 0.5592926490984743,
|
22724 |
+
"grad_norm": 4.831787586212158,
|
22725 |
+
"learning_rate": 0.00018357764065910498,
|
22726 |
+
"loss": 2.6763,
|
22727 |
+
"step": 3226
|
22728 |
+
},
|
22729 |
+
{
|
22730 |
+
"epoch": 0.5594660194174758,
|
22731 |
+
"grad_norm": 4.793760776519775,
|
22732 |
+
"learning_rate": 0.00018356765343645307,
|
22733 |
+
"loss": 2.5936,
|
22734 |
+
"step": 3227
|
22735 |
+
},
|
22736 |
+
{
|
22737 |
+
"epoch": 0.5596393897364771,
|
22738 |
+
"grad_norm": 5.149638652801514,
|
22739 |
+
"learning_rate": 0.00018355766344971156,
|
22740 |
+
"loss": 2.8524,
|
22741 |
+
"step": 3228
|
22742 |
+
},
|
22743 |
+
{
|
22744 |
+
"epoch": 0.5598127600554785,
|
22745 |
+
"grad_norm": 4.831232070922852,
|
22746 |
+
"learning_rate": 0.0001835476706992108,
|
22747 |
+
"loss": 2.8871,
|
22748 |
+
"step": 3229
|
22749 |
+
},
|
22750 |
+
{
|
22751 |
+
"epoch": 0.5599861303744799,
|
22752 |
+
"grad_norm": 4.116048336029053,
|
22753 |
+
"learning_rate": 0.00018353767518528138,
|
22754 |
+
"loss": 2.7667,
|
22755 |
+
"step": 3230
|
22756 |
+
},
|
22757 |
+
{
|
22758 |
+
"epoch": 0.5601595006934813,
|
22759 |
+
"grad_norm": 4.861133575439453,
|
22760 |
+
"learning_rate": 0.0001835276769082539,
|
22761 |
+
"loss": 2.6169,
|
22762 |
+
"step": 3231
|
22763 |
+
},
|
22764 |
+
{
|
22765 |
+
"epoch": 0.5603328710124826,
|
22766 |
+
"grad_norm": 4.4876275062561035,
|
22767 |
+
"learning_rate": 0.00018351767586845908,
|
22768 |
+
"loss": 3.1115,
|
22769 |
+
"step": 3232
|
22770 |
+
},
|
22771 |
+
{
|
22772 |
+
"epoch": 0.5605062413314841,
|
22773 |
+
"grad_norm": 4.5447821617126465,
|
22774 |
+
"learning_rate": 0.00018350767206622766,
|
22775 |
+
"loss": 2.9337,
|
22776 |
+
"step": 3233
|
22777 |
+
},
|
22778 |
+
{
|
22779 |
+
"epoch": 0.5606796116504854,
|
22780 |
+
"grad_norm": 3.921549081802368,
|
22781 |
+
"learning_rate": 0.00018349766550189057,
|
22782 |
+
"loss": 2.5209,
|
22783 |
+
"step": 3234
|
22784 |
+
},
|
22785 |
+
{
|
22786 |
+
"epoch": 0.5608529819694869,
|
22787 |
+
"grad_norm": 4.940510272979736,
|
22788 |
+
"learning_rate": 0.00018348765617577878,
|
22789 |
+
"loss": 2.6354,
|
22790 |
+
"step": 3235
|
22791 |
+
},
|
22792 |
+
{
|
22793 |
+
"epoch": 0.5610263522884882,
|
22794 |
+
"grad_norm": 5.1189703941345215,
|
22795 |
+
"learning_rate": 0.00018347764408822338,
|
22796 |
+
"loss": 2.5571,
|
22797 |
+
"step": 3236
|
22798 |
+
},
|
22799 |
+
{
|
22800 |
+
"epoch": 0.5611997226074896,
|
22801 |
+
"grad_norm": 4.569870948791504,
|
22802 |
+
"learning_rate": 0.0001834676292395555,
|
22803 |
+
"loss": 2.7492,
|
22804 |
+
"step": 3237
|
22805 |
+
},
|
22806 |
+
{
|
22807 |
+
"epoch": 0.561373092926491,
|
22808 |
+
"grad_norm": 4.407032489776611,
|
22809 |
+
"learning_rate": 0.0001834576116301064,
|
22810 |
+
"loss": 2.4574,
|
22811 |
+
"step": 3238
|
22812 |
+
},
|
22813 |
+
{
|
22814 |
+
"epoch": 0.5615464632454924,
|
22815 |
+
"grad_norm": 5.042418003082275,
|
22816 |
+
"learning_rate": 0.00018344759126020743,
|
22817 |
+
"loss": 2.8326,
|
22818 |
+
"step": 3239
|
22819 |
+
},
|
22820 |
+
{
|
22821 |
+
"epoch": 0.5617198335644937,
|
22822 |
+
"grad_norm": 5.140253067016602,
|
22823 |
+
"learning_rate": 0.00018343756813019,
|
22824 |
+
"loss": 2.7059,
|
22825 |
+
"step": 3240
|
22826 |
+
},
|
22827 |
+
{
|
22828 |
+
"epoch": 0.5618932038834952,
|
22829 |
+
"grad_norm": 4.837085247039795,
|
22830 |
+
"learning_rate": 0.00018342754224038565,
|
22831 |
+
"loss": 2.6729,
|
22832 |
+
"step": 3241
|
22833 |
+
},
|
22834 |
+
{
|
22835 |
+
"epoch": 0.5620665742024965,
|
22836 |
+
"grad_norm": 4.675772666931152,
|
22837 |
+
"learning_rate": 0.00018341751359112602,
|
22838 |
+
"loss": 2.8393,
|
22839 |
+
"step": 3242
|
22840 |
+
},
|
22841 |
+
{
|
22842 |
+
"epoch": 0.562239944521498,
|
22843 |
+
"grad_norm": 4.432028293609619,
|
22844 |
+
"learning_rate": 0.00018340748218274278,
|
22845 |
+
"loss": 2.5541,
|
22846 |
+
"step": 3243
|
22847 |
+
},
|
22848 |
+
{
|
22849 |
+
"epoch": 0.5624133148404993,
|
22850 |
+
"grad_norm": 4.844389915466309,
|
22851 |
+
"learning_rate": 0.00018339744801556775,
|
22852 |
+
"loss": 2.6398,
|
22853 |
+
"step": 3244
|
22854 |
+
},
|
22855 |
+
{
|
22856 |
+
"epoch": 0.5625866851595007,
|
22857 |
+
"grad_norm": 4.452284812927246,
|
22858 |
+
"learning_rate": 0.00018338741108993284,
|
22859 |
+
"loss": 2.5527,
|
22860 |
+
"step": 3245
|
22861 |
+
},
|
22862 |
+
{
|
22863 |
+
"epoch": 0.562760055478502,
|
22864 |
+
"grad_norm": 5.607207298278809,
|
22865 |
+
"learning_rate": 0.00018337737140617,
|
22866 |
+
"loss": 2.9514,
|
22867 |
+
"step": 3246
|
22868 |
+
},
|
22869 |
+
{
|
22870 |
+
"epoch": 0.5629334257975035,
|
22871 |
+
"grad_norm": 5.297826766967773,
|
22872 |
+
"learning_rate": 0.00018336732896461134,
|
22873 |
+
"loss": 2.6627,
|
22874 |
+
"step": 3247
|
22875 |
+
},
|
22876 |
+
{
|
22877 |
+
"epoch": 0.5631067961165048,
|
22878 |
+
"grad_norm": 5.012407302856445,
|
22879 |
+
"learning_rate": 0.00018335728376558897,
|
22880 |
+
"loss": 2.4119,
|
22881 |
+
"step": 3248
|
22882 |
+
},
|
22883 |
+
{
|
22884 |
+
"epoch": 0.5632801664355063,
|
22885 |
+
"grad_norm": 4.383134841918945,
|
22886 |
+
"learning_rate": 0.00018334723580943518,
|
22887 |
+
"loss": 2.4541,
|
22888 |
+
"step": 3249
|
22889 |
+
},
|
22890 |
+
{
|
22891 |
+
"epoch": 0.5634535367545076,
|
22892 |
+
"grad_norm": 4.921750545501709,
|
22893 |
+
"learning_rate": 0.00018333718509648233,
|
22894 |
+
"loss": 2.4352,
|
22895 |
+
"step": 3250
|
22896 |
+
},
|
22897 |
+
{
|
22898 |
+
"epoch": 0.563626907073509,
|
22899 |
+
"grad_norm": 4.321812629699707,
|
22900 |
+
"learning_rate": 0.00018332713162706282,
|
22901 |
+
"loss": 2.2969,
|
22902 |
+
"step": 3251
|
22903 |
+
},
|
22904 |
+
{
|
22905 |
+
"epoch": 0.5638002773925104,
|
22906 |
+
"grad_norm": 4.888926029205322,
|
22907 |
+
"learning_rate": 0.0001833170754015092,
|
22908 |
+
"loss": 2.613,
|
22909 |
+
"step": 3252
|
22910 |
+
},
|
22911 |
+
{
|
22912 |
+
"epoch": 0.5639736477115118,
|
22913 |
+
"grad_norm": 4.977480888366699,
|
22914 |
+
"learning_rate": 0.00018330701642015412,
|
22915 |
+
"loss": 2.6904,
|
22916 |
+
"step": 3253
|
22917 |
+
},
|
22918 |
+
{
|
22919 |
+
"epoch": 0.5641470180305131,
|
22920 |
+
"grad_norm": 4.617575168609619,
|
22921 |
+
"learning_rate": 0.00018329695468333022,
|
22922 |
+
"loss": 2.5888,
|
22923 |
+
"step": 3254
|
22924 |
+
},
|
22925 |
+
{
|
22926 |
+
"epoch": 0.5643203883495146,
|
22927 |
+
"grad_norm": 4.387608528137207,
|
22928 |
+
"learning_rate": 0.00018328689019137036,
|
22929 |
+
"loss": 2.7105,
|
22930 |
+
"step": 3255
|
22931 |
+
},
|
22932 |
+
{
|
22933 |
+
"epoch": 0.5644937586685159,
|
22934 |
+
"grad_norm": 4.935214519500732,
|
22935 |
+
"learning_rate": 0.0001832768229446074,
|
22936 |
+
"loss": 2.5602,
|
22937 |
+
"step": 3256
|
22938 |
+
},
|
22939 |
+
{
|
22940 |
+
"epoch": 0.5646671289875174,
|
22941 |
+
"grad_norm": 5.279615879058838,
|
22942 |
+
"learning_rate": 0.0001832667529433744,
|
22943 |
+
"loss": 2.963,
|
22944 |
+
"step": 3257
|
22945 |
+
},
|
22946 |
+
{
|
22947 |
+
"epoch": 0.5648404993065187,
|
22948 |
+
"grad_norm": 4.989308834075928,
|
22949 |
+
"learning_rate": 0.00018325668018800433,
|
22950 |
+
"loss": 2.4744,
|
22951 |
+
"step": 3258
|
22952 |
+
},
|
22953 |
+
{
|
22954 |
+
"epoch": 0.5650138696255201,
|
22955 |
+
"grad_norm": 4.5932841300964355,
|
22956 |
+
"learning_rate": 0.00018324660467883042,
|
22957 |
+
"loss": 2.3349,
|
22958 |
+
"step": 3259
|
22959 |
+
},
|
22960 |
+
{
|
22961 |
+
"epoch": 0.5651872399445215,
|
22962 |
+
"grad_norm": 4.207271575927734,
|
22963 |
+
"learning_rate": 0.0001832365264161859,
|
22964 |
+
"loss": 2.5639,
|
22965 |
+
"step": 3260
|
22966 |
+
},
|
22967 |
+
{
|
22968 |
+
"epoch": 0.5653606102635229,
|
22969 |
+
"grad_norm": 4.205789089202881,
|
22970 |
+
"learning_rate": 0.00018322644540040415,
|
22971 |
+
"loss": 2.5071,
|
22972 |
+
"step": 3261
|
22973 |
+
},
|
22974 |
+
{
|
22975 |
+
"epoch": 0.5655339805825242,
|
22976 |
+
"grad_norm": 4.582828998565674,
|
22977 |
+
"learning_rate": 0.00018321636163181858,
|
22978 |
+
"loss": 2.742,
|
22979 |
+
"step": 3262
|
22980 |
+
},
|
22981 |
+
{
|
22982 |
+
"epoch": 0.5657073509015257,
|
22983 |
+
"grad_norm": 5.015689849853516,
|
22984 |
+
"learning_rate": 0.00018320627511076275,
|
22985 |
+
"loss": 2.7602,
|
22986 |
+
"step": 3263
|
22987 |
+
},
|
22988 |
+
{
|
22989 |
+
"epoch": 0.565880721220527,
|
22990 |
+
"grad_norm": 5.282430648803711,
|
22991 |
+
"learning_rate": 0.00018319618583757027,
|
22992 |
+
"loss": 3.0663,
|
22993 |
+
"step": 3264
|
22994 |
+
},
|
22995 |
+
{
|
22996 |
+
"epoch": 0.5660540915395285,
|
22997 |
+
"grad_norm": 4.263000011444092,
|
22998 |
+
"learning_rate": 0.00018318609381257485,
|
22999 |
+
"loss": 2.847,
|
23000 |
+
"step": 3265
|
23001 |
+
},
|
23002 |
+
{
|
23003 |
+
"epoch": 0.5662274618585298,
|
23004 |
+
"grad_norm": 4.377638816833496,
|
23005 |
+
"learning_rate": 0.00018317599903611032,
|
23006 |
+
"loss": 2.6771,
|
23007 |
+
"step": 3266
|
23008 |
+
},
|
23009 |
+
{
|
23010 |
+
"epoch": 0.5664008321775312,
|
23011 |
+
"grad_norm": 4.68384313583374,
|
23012 |
+
"learning_rate": 0.00018316590150851053,
|
23013 |
+
"loss": 2.5549,
|
23014 |
+
"step": 3267
|
23015 |
+
},
|
23016 |
+
{
|
23017 |
+
"epoch": 0.5665742024965326,
|
23018 |
+
"grad_norm": 4.321643352508545,
|
23019 |
+
"learning_rate": 0.00018315580123010947,
|
23020 |
+
"loss": 2.643,
|
23021 |
+
"step": 3268
|
23022 |
+
},
|
23023 |
+
{
|
23024 |
+
"epoch": 0.566747572815534,
|
23025 |
+
"grad_norm": 5.782863616943359,
|
23026 |
+
"learning_rate": 0.00018314569820124124,
|
23027 |
+
"loss": 2.6308,
|
23028 |
+
"step": 3269
|
23029 |
+
},
|
23030 |
+
{
|
23031 |
+
"epoch": 0.5669209431345353,
|
23032 |
+
"grad_norm": 5.07639741897583,
|
23033 |
+
"learning_rate": 0.00018313559242224002,
|
23034 |
+
"loss": 2.3119,
|
23035 |
+
"step": 3270
|
23036 |
+
},
|
23037 |
+
{
|
23038 |
+
"epoch": 0.5670943134535368,
|
23039 |
+
"grad_norm": 4.631782531738281,
|
23040 |
+
"learning_rate": 0.00018312548389344007,
|
23041 |
+
"loss": 2.5055,
|
23042 |
+
"step": 3271
|
23043 |
+
},
|
23044 |
+
{
|
23045 |
+
"epoch": 0.5672676837725381,
|
23046 |
+
"grad_norm": 4.796509742736816,
|
23047 |
+
"learning_rate": 0.00018311537261517572,
|
23048 |
+
"loss": 2.6496,
|
23049 |
+
"step": 3272
|
23050 |
+
},
|
23051 |
+
{
|
23052 |
+
"epoch": 0.5674410540915396,
|
23053 |
+
"grad_norm": 4.067022800445557,
|
23054 |
+
"learning_rate": 0.00018310525858778137,
|
23055 |
+
"loss": 2.5025,
|
23056 |
+
"step": 3273
|
23057 |
+
},
|
23058 |
+
{
|
23059 |
+
"epoch": 0.5676144244105409,
|
23060 |
+
"grad_norm": 4.181182384490967,
|
23061 |
+
"learning_rate": 0.00018309514181159164,
|
23062 |
+
"loss": 2.692,
|
23063 |
+
"step": 3274
|
23064 |
+
},
|
23065 |
+
{
|
23066 |
+
"epoch": 0.5677877947295423,
|
23067 |
+
"grad_norm": 5.265228748321533,
|
23068 |
+
"learning_rate": 0.00018308502228694108,
|
23069 |
+
"loss": 2.8712,
|
23070 |
+
"step": 3275
|
23071 |
+
},
|
23072 |
+
{
|
23073 |
+
"epoch": 0.5679611650485437,
|
23074 |
+
"grad_norm": 4.782460689544678,
|
23075 |
+
"learning_rate": 0.00018307490001416446,
|
23076 |
+
"loss": 2.7431,
|
23077 |
+
"step": 3276
|
23078 |
+
},
|
23079 |
+
{
|
23080 |
+
"epoch": 0.5681345353675451,
|
23081 |
+
"grad_norm": 4.4073662757873535,
|
23082 |
+
"learning_rate": 0.00018306477499359657,
|
23083 |
+
"loss": 2.3436,
|
23084 |
+
"step": 3277
|
23085 |
+
},
|
23086 |
+
{
|
23087 |
+
"epoch": 0.5683079056865464,
|
23088 |
+
"grad_norm": 4.7344584465026855,
|
23089 |
+
"learning_rate": 0.00018305464722557228,
|
23090 |
+
"loss": 2.5161,
|
23091 |
+
"step": 3278
|
23092 |
+
},
|
23093 |
+
{
|
23094 |
+
"epoch": 0.5684812760055479,
|
23095 |
+
"grad_norm": 4.4923224449157715,
|
23096 |
+
"learning_rate": 0.0001830445167104266,
|
23097 |
+
"loss": 2.6298,
|
23098 |
+
"step": 3279
|
23099 |
+
},
|
23100 |
+
{
|
23101 |
+
"epoch": 0.5686546463245492,
|
23102 |
+
"grad_norm": 4.6726884841918945,
|
23103 |
+
"learning_rate": 0.0001830343834484946,
|
23104 |
+
"loss": 2.7933,
|
23105 |
+
"step": 3280
|
23106 |
+
},
|
23107 |
+
{
|
23108 |
+
"epoch": 0.5688280166435506,
|
23109 |
+
"grad_norm": 4.8083271980285645,
|
23110 |
+
"learning_rate": 0.00018302424744011142,
|
23111 |
+
"loss": 2.8509,
|
23112 |
+
"step": 3281
|
23113 |
+
},
|
23114 |
+
{
|
23115 |
+
"epoch": 0.569001386962552,
|
23116 |
+
"grad_norm": 5.104242324829102,
|
23117 |
+
"learning_rate": 0.00018301410868561238,
|
23118 |
+
"loss": 2.6193,
|
23119 |
+
"step": 3282
|
23120 |
+
},
|
23121 |
+
{
|
23122 |
+
"epoch": 0.5691747572815534,
|
23123 |
+
"grad_norm": 5.134989261627197,
|
23124 |
+
"learning_rate": 0.0001830039671853328,
|
23125 |
+
"loss": 3.0867,
|
23126 |
+
"step": 3283
|
23127 |
+
},
|
23128 |
+
{
|
23129 |
+
"epoch": 0.5693481276005548,
|
23130 |
+
"grad_norm": 4.670955181121826,
|
23131 |
+
"learning_rate": 0.00018299382293960812,
|
23132 |
+
"loss": 2.7721,
|
23133 |
+
"step": 3284
|
23134 |
+
},
|
23135 |
+
{
|
23136 |
+
"epoch": 0.5695214979195562,
|
23137 |
+
"grad_norm": 4.840510368347168,
|
23138 |
+
"learning_rate": 0.00018298367594877385,
|
23139 |
+
"loss": 2.5497,
|
23140 |
+
"step": 3285
|
23141 |
+
},
|
23142 |
+
{
|
23143 |
+
"epoch": 0.5696948682385575,
|
23144 |
+
"grad_norm": 4.2508931159973145,
|
23145 |
+
"learning_rate": 0.00018297352621316566,
|
23146 |
+
"loss": 2.632,
|
23147 |
+
"step": 3286
|
23148 |
+
},
|
23149 |
+
{
|
23150 |
+
"epoch": 0.569868238557559,
|
23151 |
+
"grad_norm": 4.889342784881592,
|
23152 |
+
"learning_rate": 0.00018296337373311923,
|
23153 |
+
"loss": 2.8002,
|
23154 |
+
"step": 3287
|
23155 |
+
},
|
23156 |
+
{
|
23157 |
+
"epoch": 0.5700416088765603,
|
23158 |
+
"grad_norm": 5.174691677093506,
|
23159 |
+
"learning_rate": 0.00018295321850897035,
|
23160 |
+
"loss": 3.1731,
|
23161 |
+
"step": 3288
|
23162 |
+
},
|
23163 |
+
{
|
23164 |
+
"epoch": 0.5702149791955617,
|
23165 |
+
"grad_norm": 5.219470500946045,
|
23166 |
+
"learning_rate": 0.00018294306054105497,
|
23167 |
+
"loss": 2.365,
|
23168 |
+
"step": 3289
|
23169 |
+
},
|
23170 |
+
{
|
23171 |
+
"epoch": 0.5703883495145631,
|
23172 |
+
"grad_norm": 5.663549423217773,
|
23173 |
+
"learning_rate": 0.00018293289982970902,
|
23174 |
+
"loss": 2.8535,
|
23175 |
+
"step": 3290
|
23176 |
+
},
|
23177 |
+
{
|
23178 |
+
"epoch": 0.5705617198335645,
|
23179 |
+
"grad_norm": 5.0478363037109375,
|
23180 |
+
"learning_rate": 0.0001829227363752686,
|
23181 |
+
"loss": 2.7493,
|
23182 |
+
"step": 3291
|
23183 |
+
},
|
23184 |
+
{
|
23185 |
+
"epoch": 0.5707350901525658,
|
23186 |
+
"grad_norm": 4.2891716957092285,
|
23187 |
+
"learning_rate": 0.00018291257017806988,
|
23188 |
+
"loss": 2.7971,
|
23189 |
+
"step": 3292
|
23190 |
+
},
|
23191 |
+
{
|
23192 |
+
"epoch": 0.5709084604715673,
|
23193 |
+
"grad_norm": 4.707492351531982,
|
23194 |
+
"learning_rate": 0.00018290240123844913,
|
23195 |
+
"loss": 2.6336,
|
23196 |
+
"step": 3293
|
23197 |
+
},
|
23198 |
+
{
|
23199 |
+
"epoch": 0.5710818307905686,
|
23200 |
+
"grad_norm": 4.963775157928467,
|
23201 |
+
"learning_rate": 0.00018289222955674266,
|
23202 |
+
"loss": 2.5244,
|
23203 |
+
"step": 3294
|
23204 |
+
},
|
23205 |
+
{
|
23206 |
+
"epoch": 0.5712552011095701,
|
23207 |
+
"grad_norm": 4.937522888183594,
|
23208 |
+
"learning_rate": 0.00018288205513328692,
|
23209 |
+
"loss": 2.8081,
|
23210 |
+
"step": 3295
|
23211 |
+
},
|
23212 |
+
{
|
23213 |
+
"epoch": 0.5714285714285714,
|
23214 |
+
"grad_norm": 3.659881830215454,
|
23215 |
+
"learning_rate": 0.00018287187796841846,
|
23216 |
+
"loss": 2.4409,
|
23217 |
+
"step": 3296
|
23218 |
+
},
|
23219 |
+
{
|
23220 |
+
"epoch": 0.5716019417475728,
|
23221 |
+
"grad_norm": 4.32432746887207,
|
23222 |
+
"learning_rate": 0.0001828616980624739,
|
23223 |
+
"loss": 2.1267,
|
23224 |
+
"step": 3297
|
23225 |
+
},
|
23226 |
+
{
|
23227 |
+
"epoch": 0.5717753120665742,
|
23228 |
+
"grad_norm": 4.8663859367370605,
|
23229 |
+
"learning_rate": 0.00018285151541578998,
|
23230 |
+
"loss": 2.9718,
|
23231 |
+
"step": 3298
|
23232 |
+
},
|
23233 |
+
{
|
23234 |
+
"epoch": 0.5719486823855756,
|
23235 |
+
"grad_norm": 4.719000816345215,
|
23236 |
+
"learning_rate": 0.00018284133002870341,
|
23237 |
+
"loss": 2.4447,
|
23238 |
+
"step": 3299
|
23239 |
+
},
|
23240 |
+
{
|
23241 |
+
"epoch": 0.5721220527045769,
|
23242 |
+
"grad_norm": 5.077669620513916,
|
23243 |
+
"learning_rate": 0.00018283114190155116,
|
23244 |
+
"loss": 2.6303,
|
23245 |
+
"step": 3300
|
23246 |
+
},
|
23247 |
+
{
|
23248 |
+
"epoch": 0.5722954230235784,
|
23249 |
+
"grad_norm": 5.435730934143066,
|
23250 |
+
"learning_rate": 0.0001828209510346702,
|
23251 |
+
"loss": 3.1274,
|
23252 |
+
"step": 3301
|
23253 |
+
},
|
23254 |
+
{
|
23255 |
+
"epoch": 0.5724687933425797,
|
23256 |
+
"grad_norm": 4.955165863037109,
|
23257 |
+
"learning_rate": 0.00018281075742839758,
|
23258 |
+
"loss": 2.8451,
|
23259 |
+
"step": 3302
|
23260 |
+
},
|
23261 |
+
{
|
23262 |
+
"epoch": 0.5726421636615812,
|
23263 |
+
"grad_norm": 5.280594348907471,
|
23264 |
+
"learning_rate": 0.00018280056108307047,
|
23265 |
+
"loss": 2.3648,
|
23266 |
+
"step": 3303
|
23267 |
+
},
|
23268 |
+
{
|
23269 |
+
"epoch": 0.5728155339805825,
|
23270 |
+
"grad_norm": 4.6982421875,
|
23271 |
+
"learning_rate": 0.00018279036199902616,
|
23272 |
+
"loss": 2.8781,
|
23273 |
+
"step": 3304
|
23274 |
+
},
|
23275 |
+
{
|
23276 |
+
"epoch": 0.5729889042995839,
|
23277 |
+
"grad_norm": 4.744504928588867,
|
23278 |
+
"learning_rate": 0.00018278016017660194,
|
23279 |
+
"loss": 2.7233,
|
23280 |
+
"step": 3305
|
23281 |
+
},
|
23282 |
+
{
|
23283 |
+
"epoch": 0.5731622746185853,
|
23284 |
+
"grad_norm": 4.595864295959473,
|
23285 |
+
"learning_rate": 0.0001827699556161353,
|
23286 |
+
"loss": 2.6739,
|
23287 |
+
"step": 3306
|
23288 |
+
},
|
23289 |
+
{
|
23290 |
+
"epoch": 0.5733356449375867,
|
23291 |
+
"grad_norm": 5.104191780090332,
|
23292 |
+
"learning_rate": 0.00018275974831796372,
|
23293 |
+
"loss": 3.1129,
|
23294 |
+
"step": 3307
|
23295 |
+
},
|
23296 |
+
{
|
23297 |
+
"epoch": 0.573509015256588,
|
23298 |
+
"grad_norm": 4.1947784423828125,
|
23299 |
+
"learning_rate": 0.00018274953828242488,
|
23300 |
+
"loss": 2.3496,
|
23301 |
+
"step": 3308
|
23302 |
+
},
|
23303 |
+
{
|
23304 |
+
"epoch": 0.5736823855755895,
|
23305 |
+
"grad_norm": 4.07160758972168,
|
23306 |
+
"learning_rate": 0.0001827393255098564,
|
23307 |
+
"loss": 2.6475,
|
23308 |
+
"step": 3309
|
23309 |
+
},
|
23310 |
+
{
|
23311 |
+
"epoch": 0.5738557558945908,
|
23312 |
+
"grad_norm": 4.301163673400879,
|
23313 |
+
"learning_rate": 0.00018272911000059616,
|
23314 |
+
"loss": 2.2616,
|
23315 |
+
"step": 3310
|
23316 |
+
},
|
23317 |
+
{
|
23318 |
+
"epoch": 0.5740291262135923,
|
23319 |
+
"grad_norm": 4.440234661102295,
|
23320 |
+
"learning_rate": 0.000182718891754982,
|
23321 |
+
"loss": 2.4906,
|
23322 |
+
"step": 3311
|
23323 |
+
},
|
23324 |
+
{
|
23325 |
+
"epoch": 0.5742024965325936,
|
23326 |
+
"grad_norm": 4.867928504943848,
|
23327 |
+
"learning_rate": 0.00018270867077335192,
|
23328 |
+
"loss": 2.8392,
|
23329 |
+
"step": 3312
|
23330 |
+
},
|
23331 |
+
{
|
23332 |
+
"epoch": 0.574375866851595,
|
23333 |
+
"grad_norm": 4.4874653816223145,
|
23334 |
+
"learning_rate": 0.000182698447056044,
|
23335 |
+
"loss": 2.4208,
|
23336 |
+
"step": 3313
|
23337 |
+
},
|
23338 |
+
{
|
23339 |
+
"epoch": 0.5745492371705964,
|
23340 |
+
"grad_norm": 4.641524791717529,
|
23341 |
+
"learning_rate": 0.00018268822060339636,
|
23342 |
+
"loss": 2.7429,
|
23343 |
+
"step": 3314
|
23344 |
+
},
|
23345 |
+
{
|
23346 |
+
"epoch": 0.5747226074895978,
|
23347 |
+
"grad_norm": 4.988749027252197,
|
23348 |
+
"learning_rate": 0.00018267799141574728,
|
23349 |
+
"loss": 2.9103,
|
23350 |
+
"step": 3315
|
23351 |
+
},
|
23352 |
+
{
|
23353 |
+
"epoch": 0.5748959778085991,
|
23354 |
+
"grad_norm": 4.886897563934326,
|
23355 |
+
"learning_rate": 0.00018266775949343512,
|
23356 |
+
"loss": 2.9617,
|
23357 |
+
"step": 3316
|
23358 |
+
},
|
23359 |
+
{
|
23360 |
+
"epoch": 0.5750693481276006,
|
23361 |
+
"grad_norm": 4.58066987991333,
|
23362 |
+
"learning_rate": 0.00018265752483679829,
|
23363 |
+
"loss": 2.6433,
|
23364 |
+
"step": 3317
|
23365 |
+
},
|
23366 |
+
{
|
23367 |
+
"epoch": 0.5752427184466019,
|
23368 |
+
"grad_norm": 4.467398166656494,
|
23369 |
+
"learning_rate": 0.00018264728744617526,
|
23370 |
+
"loss": 2.354,
|
23371 |
+
"step": 3318
|
23372 |
+
},
|
23373 |
+
{
|
23374 |
+
"epoch": 0.5754160887656034,
|
23375 |
+
"grad_norm": 4.892213344573975,
|
23376 |
+
"learning_rate": 0.00018263704732190473,
|
23377 |
+
"loss": 2.9833,
|
23378 |
+
"step": 3319
|
23379 |
+
},
|
23380 |
+
{
|
23381 |
+
"epoch": 0.5755894590846047,
|
23382 |
+
"grad_norm": 4.0669941902160645,
|
23383 |
+
"learning_rate": 0.00018262680446432536,
|
23384 |
+
"loss": 2.3943,
|
23385 |
+
"step": 3320
|
23386 |
+
},
|
23387 |
+
{
|
23388 |
+
"epoch": 0.5757628294036061,
|
23389 |
+
"grad_norm": 4.302504062652588,
|
23390 |
+
"learning_rate": 0.00018261655887377595,
|
23391 |
+
"loss": 2.2251,
|
23392 |
+
"step": 3321
|
23393 |
+
},
|
23394 |
+
{
|
23395 |
+
"epoch": 0.5759361997226075,
|
23396 |
+
"grad_norm": 4.474221706390381,
|
23397 |
+
"learning_rate": 0.00018260631055059542,
|
23398 |
+
"loss": 2.5036,
|
23399 |
+
"step": 3322
|
23400 |
+
},
|
23401 |
+
{
|
23402 |
+
"epoch": 0.5761095700416089,
|
23403 |
+
"grad_norm": 4.923501968383789,
|
23404 |
+
"learning_rate": 0.00018259605949512265,
|
23405 |
+
"loss": 3.0561,
|
23406 |
+
"step": 3323
|
23407 |
+
},
|
23408 |
+
{
|
23409 |
+
"epoch": 0.5762829403606102,
|
23410 |
+
"grad_norm": 4.720697402954102,
|
23411 |
+
"learning_rate": 0.0001825858057076968,
|
23412 |
+
"loss": 2.5389,
|
23413 |
+
"step": 3324
|
23414 |
+
},
|
23415 |
+
{
|
23416 |
+
"epoch": 0.5764563106796117,
|
23417 |
+
"grad_norm": 4.73622465133667,
|
23418 |
+
"learning_rate": 0.000182575549188657,
|
23419 |
+
"loss": 2.6543,
|
23420 |
+
"step": 3325
|
23421 |
+
},
|
23422 |
+
{
|
23423 |
+
"epoch": 0.576629680998613,
|
23424 |
+
"grad_norm": 4.317144870758057,
|
23425 |
+
"learning_rate": 0.00018256528993834244,
|
23426 |
+
"loss": 2.4873,
|
23427 |
+
"step": 3326
|
23428 |
+
},
|
23429 |
+
{
|
23430 |
+
"epoch": 0.5768030513176144,
|
23431 |
+
"grad_norm": 4.784503936767578,
|
23432 |
+
"learning_rate": 0.00018255502795709255,
|
23433 |
+
"loss": 2.8198,
|
23434 |
+
"step": 3327
|
23435 |
+
},
|
23436 |
+
{
|
23437 |
+
"epoch": 0.5769764216366158,
|
23438 |
+
"grad_norm": 4.800480842590332,
|
23439 |
+
"learning_rate": 0.00018254476324524666,
|
23440 |
+
"loss": 2.8276,
|
23441 |
+
"step": 3328
|
23442 |
+
},
|
23443 |
+
{
|
23444 |
+
"epoch": 0.5771497919556172,
|
23445 |
+
"grad_norm": 4.321253299713135,
|
23446 |
+
"learning_rate": 0.00018253449580314435,
|
23447 |
+
"loss": 2.559,
|
23448 |
+
"step": 3329
|
23449 |
+
},
|
23450 |
+
{
|
23451 |
+
"epoch": 0.5773231622746186,
|
23452 |
+
"grad_norm": 5.3969316482543945,
|
23453 |
+
"learning_rate": 0.0001825242256311252,
|
23454 |
+
"loss": 2.8425,
|
23455 |
+
"step": 3330
|
23456 |
+
},
|
23457 |
+
{
|
23458 |
+
"epoch": 0.57749653259362,
|
23459 |
+
"grad_norm": 4.852773189544678,
|
23460 |
+
"learning_rate": 0.00018251395272952893,
|
23461 |
+
"loss": 2.3795,
|
23462 |
+
"step": 3331
|
23463 |
+
},
|
23464 |
+
{
|
23465 |
+
"epoch": 0.5776699029126213,
|
23466 |
+
"grad_norm": 4.543683052062988,
|
23467 |
+
"learning_rate": 0.00018250367709869532,
|
23468 |
+
"loss": 2.7424,
|
23469 |
+
"step": 3332
|
23470 |
+
},
|
23471 |
+
{
|
23472 |
+
"epoch": 0.5778432732316228,
|
23473 |
+
"grad_norm": 4.353267192840576,
|
23474 |
+
"learning_rate": 0.00018249339873896423,
|
23475 |
+
"loss": 2.6606,
|
23476 |
+
"step": 3333
|
23477 |
+
},
|
23478 |
+
{
|
23479 |
+
"epoch": 0.5780166435506241,
|
23480 |
+
"grad_norm": 4.531190872192383,
|
23481 |
+
"learning_rate": 0.0001824831176506756,
|
23482 |
+
"loss": 2.5955,
|
23483 |
+
"step": 3334
|
23484 |
+
},
|
23485 |
+
{
|
23486 |
+
"epoch": 0.5781900138696255,
|
23487 |
+
"grad_norm": 4.90303373336792,
|
23488 |
+
"learning_rate": 0.00018247283383416958,
|
23489 |
+
"loss": 2.7576,
|
23490 |
+
"step": 3335
|
23491 |
+
},
|
23492 |
+
{
|
23493 |
+
"epoch": 0.5783633841886269,
|
23494 |
+
"grad_norm": 4.7935285568237305,
|
23495 |
+
"learning_rate": 0.00018246254728978623,
|
23496 |
+
"loss": 3.0418,
|
23497 |
+
"step": 3336
|
23498 |
+
},
|
23499 |
+
{
|
23500 |
+
"epoch": 0.5785367545076283,
|
23501 |
+
"grad_norm": 5.3808274269104,
|
23502 |
+
"learning_rate": 0.00018245225801786586,
|
23503 |
+
"loss": 2.8938,
|
23504 |
+
"step": 3337
|
23505 |
+
},
|
23506 |
+
{
|
23507 |
+
"epoch": 0.5787101248266296,
|
23508 |
+
"grad_norm": 4.451288223266602,
|
23509 |
+
"learning_rate": 0.00018244196601874873,
|
23510 |
+
"loss": 2.7302,
|
23511 |
+
"step": 3338
|
23512 |
+
},
|
23513 |
+
{
|
23514 |
+
"epoch": 0.5788834951456311,
|
23515 |
+
"grad_norm": 4.647993087768555,
|
23516 |
+
"learning_rate": 0.0001824316712927753,
|
23517 |
+
"loss": 2.5655,
|
23518 |
+
"step": 3339
|
23519 |
+
},
|
23520 |
+
{
|
23521 |
+
"epoch": 0.5790568654646324,
|
23522 |
+
"grad_norm": 5.102174282073975,
|
23523 |
+
"learning_rate": 0.00018242137384028606,
|
23524 |
+
"loss": 2.5658,
|
23525 |
+
"step": 3340
|
23526 |
+
},
|
23527 |
+
{
|
23528 |
+
"epoch": 0.5792302357836339,
|
23529 |
+
"grad_norm": 4.787209987640381,
|
23530 |
+
"learning_rate": 0.00018241107366162161,
|
23531 |
+
"loss": 2.4885,
|
23532 |
+
"step": 3341
|
23533 |
+
},
|
23534 |
+
{
|
23535 |
+
"epoch": 0.5794036061026352,
|
23536 |
+
"grad_norm": 5.380107879638672,
|
23537 |
+
"learning_rate": 0.0001824007707571227,
|
23538 |
+
"loss": 2.7051,
|
23539 |
+
"step": 3342
|
23540 |
+
},
|
23541 |
+
{
|
23542 |
+
"epoch": 0.5795769764216366,
|
23543 |
+
"grad_norm": 5.206150531768799,
|
23544 |
+
"learning_rate": 0.00018239046512713,
|
23545 |
+
"loss": 3.0325,
|
23546 |
+
"step": 3343
|
23547 |
+
},
|
23548 |
+
{
|
23549 |
+
"epoch": 0.579750346740638,
|
23550 |
+
"grad_norm": 5.208510875701904,
|
23551 |
+
"learning_rate": 0.00018238015677198444,
|
23552 |
+
"loss": 3.2486,
|
23553 |
+
"step": 3344
|
23554 |
+
},
|
23555 |
+
{
|
23556 |
+
"epoch": 0.5799237170596394,
|
23557 |
+
"grad_norm": 4.896860599517822,
|
23558 |
+
"learning_rate": 0.00018236984569202703,
|
23559 |
+
"loss": 2.7862,
|
23560 |
+
"step": 3345
|
23561 |
+
},
|
23562 |
+
{
|
23563 |
+
"epoch": 0.5800970873786407,
|
23564 |
+
"grad_norm": 4.807587623596191,
|
23565 |
+
"learning_rate": 0.0001823595318875987,
|
23566 |
+
"loss": 2.4567,
|
23567 |
+
"step": 3346
|
23568 |
+
},
|
23569 |
+
{
|
23570 |
+
"epoch": 0.5802704576976422,
|
23571 |
+
"grad_norm": 3.840117931365967,
|
23572 |
+
"learning_rate": 0.0001823492153590407,
|
23573 |
+
"loss": 2.5847,
|
23574 |
+
"step": 3347
|
23575 |
+
},
|
23576 |
+
{
|
23577 |
+
"epoch": 0.5804438280166435,
|
23578 |
+
"grad_norm": 4.926663875579834,
|
23579 |
+
"learning_rate": 0.0001823388961066942,
|
23580 |
+
"loss": 2.5562,
|
23581 |
+
"step": 3348
|
23582 |
+
},
|
23583 |
+
{
|
23584 |
+
"epoch": 0.580617198335645,
|
23585 |
+
"grad_norm": 4.893335342407227,
|
23586 |
+
"learning_rate": 0.00018232857413090056,
|
23587 |
+
"loss": 2.7554,
|
23588 |
+
"step": 3349
|
23589 |
+
},
|
23590 |
+
{
|
23591 |
+
"epoch": 0.5807905686546463,
|
23592 |
+
"grad_norm": 4.772489070892334,
|
23593 |
+
"learning_rate": 0.00018231824943200118,
|
23594 |
+
"loss": 2.9504,
|
23595 |
+
"step": 3350
|
23596 |
+
},
|
23597 |
+
{
|
23598 |
+
"epoch": 0.5809639389736477,
|
23599 |
+
"grad_norm": 4.29256010055542,
|
23600 |
+
"learning_rate": 0.0001823079220103375,
|
23601 |
+
"loss": 2.1377,
|
23602 |
+
"step": 3351
|
23603 |
+
},
|
23604 |
+
{
|
23605 |
+
"epoch": 0.5811373092926491,
|
23606 |
+
"grad_norm": 4.612153053283691,
|
23607 |
+
"learning_rate": 0.0001822975918662512,
|
23608 |
+
"loss": 2.7174,
|
23609 |
+
"step": 3352
|
23610 |
+
},
|
23611 |
+
{
|
23612 |
+
"epoch": 0.5813106796116505,
|
23613 |
+
"grad_norm": 4.796567440032959,
|
23614 |
+
"learning_rate": 0.00018228725900008388,
|
23615 |
+
"loss": 2.9081,
|
23616 |
+
"step": 3353
|
23617 |
+
},
|
23618 |
+
{
|
23619 |
+
"epoch": 0.5814840499306518,
|
23620 |
+
"grad_norm": 4.893945693969727,
|
23621 |
+
"learning_rate": 0.00018227692341217736,
|
23622 |
+
"loss": 2.8182,
|
23623 |
+
"step": 3354
|
23624 |
+
},
|
23625 |
+
{
|
23626 |
+
"epoch": 0.5816574202496533,
|
23627 |
+
"grad_norm": 5.553914546966553,
|
23628 |
+
"learning_rate": 0.00018226658510287353,
|
23629 |
+
"loss": 2.484,
|
23630 |
+
"step": 3355
|
23631 |
+
},
|
23632 |
+
{
|
23633 |
+
"epoch": 0.5818307905686546,
|
23634 |
+
"grad_norm": 4.586973190307617,
|
23635 |
+
"learning_rate": 0.00018225624407251426,
|
23636 |
+
"loss": 2.5138,
|
23637 |
+
"step": 3356
|
23638 |
+
},
|
23639 |
+
{
|
23640 |
+
"epoch": 0.582004160887656,
|
23641 |
+
"grad_norm": 5.523622035980225,
|
23642 |
+
"learning_rate": 0.00018224590032144165,
|
23643 |
+
"loss": 2.9678,
|
23644 |
+
"step": 3357
|
23645 |
+
},
|
23646 |
+
{
|
23647 |
+
"epoch": 0.5821775312066574,
|
23648 |
+
"grad_norm": 4.205085277557373,
|
23649 |
+
"learning_rate": 0.00018223555384999778,
|
23650 |
+
"loss": 2.3638,
|
23651 |
+
"step": 3358
|
23652 |
+
},
|
23653 |
+
{
|
23654 |
+
"epoch": 0.5823509015256588,
|
23655 |
+
"grad_norm": 4.149076461791992,
|
23656 |
+
"learning_rate": 0.00018222520465852496,
|
23657 |
+
"loss": 2.6156,
|
23658 |
+
"step": 3359
|
23659 |
+
},
|
23660 |
+
{
|
23661 |
+
"epoch": 0.5825242718446602,
|
23662 |
+
"grad_norm": 4.7897868156433105,
|
23663 |
+
"learning_rate": 0.00018221485274736544,
|
23664 |
+
"loss": 2.5654,
|
23665 |
+
"step": 3360
|
23666 |
+
},
|
23667 |
+
{
|
23668 |
+
"epoch": 0.5826976421636616,
|
23669 |
+
"grad_norm": 4.5218729972839355,
|
23670 |
+
"learning_rate": 0.00018220449811686158,
|
23671 |
+
"loss": 2.5656,
|
23672 |
+
"step": 3361
|
23673 |
+
},
|
23674 |
+
{
|
23675 |
+
"epoch": 0.5828710124826629,
|
23676 |
+
"grad_norm": 4.892624378204346,
|
23677 |
+
"learning_rate": 0.00018219414076735594,
|
23678 |
+
"loss": 2.798,
|
23679 |
+
"step": 3362
|
23680 |
+
},
|
23681 |
+
{
|
23682 |
+
"epoch": 0.5830443828016644,
|
23683 |
+
"grad_norm": 4.101902484893799,
|
23684 |
+
"learning_rate": 0.00018218378069919107,
|
23685 |
+
"loss": 2.7525,
|
23686 |
+
"step": 3363
|
23687 |
+
},
|
23688 |
+
{
|
23689 |
+
"epoch": 0.5832177531206657,
|
23690 |
+
"grad_norm": 4.455328941345215,
|
23691 |
+
"learning_rate": 0.00018217341791270967,
|
23692 |
+
"loss": 2.75,
|
23693 |
+
"step": 3364
|
23694 |
+
},
|
23695 |
+
{
|
23696 |
+
"epoch": 0.5833911234396671,
|
23697 |
+
"grad_norm": 4.281439304351807,
|
23698 |
+
"learning_rate": 0.00018216305240825444,
|
23699 |
+
"loss": 2.7959,
|
23700 |
+
"step": 3365
|
23701 |
+
},
|
23702 |
+
{
|
23703 |
+
"epoch": 0.5835644937586685,
|
23704 |
+
"grad_norm": 5.3705010414123535,
|
23705 |
+
"learning_rate": 0.0001821526841861683,
|
23706 |
+
"loss": 3.1096,
|
23707 |
+
"step": 3366
|
23708 |
+
},
|
23709 |
+
{
|
23710 |
+
"epoch": 0.5837378640776699,
|
23711 |
+
"grad_norm": 4.449636459350586,
|
23712 |
+
"learning_rate": 0.00018214231324679414,
|
23713 |
+
"loss": 2.4797,
|
23714 |
+
"step": 3367
|
23715 |
+
},
|
23716 |
+
{
|
23717 |
+
"epoch": 0.5839112343966713,
|
23718 |
+
"grad_norm": 4.880105972290039,
|
23719 |
+
"learning_rate": 0.00018213193959047499,
|
23720 |
+
"loss": 2.5145,
|
23721 |
+
"step": 3368
|
23722 |
+
},
|
23723 |
+
{
|
23724 |
+
"epoch": 0.5840846047156727,
|
23725 |
+
"grad_norm": 4.388774871826172,
|
23726 |
+
"learning_rate": 0.00018212156321755402,
|
23727 |
+
"loss": 2.6195,
|
23728 |
+
"step": 3369
|
23729 |
+
},
|
23730 |
+
{
|
23731 |
+
"epoch": 0.584257975034674,
|
23732 |
+
"grad_norm": 4.701444625854492,
|
23733 |
+
"learning_rate": 0.00018211118412837438,
|
23734 |
+
"loss": 2.7229,
|
23735 |
+
"step": 3370
|
23736 |
+
},
|
23737 |
+
{
|
23738 |
+
"epoch": 0.5844313453536755,
|
23739 |
+
"grad_norm": 4.844371318817139,
|
23740 |
+
"learning_rate": 0.00018210080232327938,
|
23741 |
+
"loss": 2.7287,
|
23742 |
+
"step": 3371
|
23743 |
+
},
|
23744 |
+
{
|
23745 |
+
"epoch": 0.5846047156726768,
|
23746 |
+
"grad_norm": 4.716871738433838,
|
23747 |
+
"learning_rate": 0.00018209041780261248,
|
23748 |
+
"loss": 2.5342,
|
23749 |
+
"step": 3372
|
23750 |
+
},
|
23751 |
+
{
|
23752 |
+
"epoch": 0.5847780859916782,
|
23753 |
+
"grad_norm": 4.618688106536865,
|
23754 |
+
"learning_rate": 0.00018208003056671706,
|
23755 |
+
"loss": 2.5169,
|
23756 |
+
"step": 3373
|
23757 |
+
},
|
23758 |
+
{
|
23759 |
+
"epoch": 0.5849514563106796,
|
23760 |
+
"grad_norm": 5.963217258453369,
|
23761 |
+
"learning_rate": 0.00018206964061593674,
|
23762 |
+
"loss": 3.0068,
|
23763 |
+
"step": 3374
|
23764 |
+
},
|
23765 |
+
{
|
23766 |
+
"epoch": 0.585124826629681,
|
23767 |
+
"grad_norm": 4.944548606872559,
|
23768 |
+
"learning_rate": 0.00018205924795061519,
|
23769 |
+
"loss": 2.9618,
|
23770 |
+
"step": 3375
|
23771 |
+
},
|
23772 |
+
{
|
23773 |
+
"epoch": 0.5852981969486823,
|
23774 |
+
"grad_norm": 5.098118305206299,
|
23775 |
+
"learning_rate": 0.0001820488525710961,
|
23776 |
+
"loss": 2.8263,
|
23777 |
+
"step": 3376
|
23778 |
+
},
|
23779 |
+
{
|
23780 |
+
"epoch": 0.5854715672676838,
|
23781 |
+
"grad_norm": 5.193764686584473,
|
23782 |
+
"learning_rate": 0.0001820384544777234,
|
23783 |
+
"loss": 3.0504,
|
23784 |
+
"step": 3377
|
23785 |
+
},
|
23786 |
+
{
|
23787 |
+
"epoch": 0.5856449375866851,
|
23788 |
+
"grad_norm": 4.885875225067139,
|
23789 |
+
"learning_rate": 0.00018202805367084092,
|
23790 |
+
"loss": 2.6449,
|
23791 |
+
"step": 3378
|
23792 |
+
},
|
23793 |
+
{
|
23794 |
+
"epoch": 0.5858183079056866,
|
23795 |
+
"grad_norm": 4.806830883026123,
|
23796 |
+
"learning_rate": 0.00018201765015079272,
|
23797 |
+
"loss": 2.3542,
|
23798 |
+
"step": 3379
|
23799 |
+
},
|
23800 |
+
{
|
23801 |
+
"epoch": 0.5859916782246879,
|
23802 |
+
"grad_norm": 5.08026647567749,
|
23803 |
+
"learning_rate": 0.00018200724391792298,
|
23804 |
+
"loss": 2.6523,
|
23805 |
+
"step": 3380
|
23806 |
+
},
|
23807 |
+
{
|
23808 |
+
"epoch": 0.5861650485436893,
|
23809 |
+
"grad_norm": 4.419659614562988,
|
23810 |
+
"learning_rate": 0.00018199683497257575,
|
23811 |
+
"loss": 2.6682,
|
23812 |
+
"step": 3381
|
23813 |
+
},
|
23814 |
+
{
|
23815 |
+
"epoch": 0.5863384188626907,
|
23816 |
+
"grad_norm": 4.407141208648682,
|
23817 |
+
"learning_rate": 0.00018198642331509546,
|
23818 |
+
"loss": 2.6352,
|
23819 |
+
"step": 3382
|
23820 |
+
},
|
23821 |
+
{
|
23822 |
+
"epoch": 0.5865117891816921,
|
23823 |
+
"grad_norm": 5.744163990020752,
|
23824 |
+
"learning_rate": 0.00018197600894582637,
|
23825 |
+
"loss": 2.6692,
|
23826 |
+
"step": 3383
|
23827 |
+
},
|
23828 |
+
{
|
23829 |
+
"epoch": 0.5866851595006934,
|
23830 |
+
"grad_norm": 4.836077690124512,
|
23831 |
+
"learning_rate": 0.00018196559186511304,
|
23832 |
+
"loss": 2.6554,
|
23833 |
+
"step": 3384
|
23834 |
+
},
|
23835 |
+
{
|
23836 |
+
"epoch": 0.5868585298196949,
|
23837 |
+
"grad_norm": 5.319147109985352,
|
23838 |
+
"learning_rate": 0.00018195517207329995,
|
23839 |
+
"loss": 2.6856,
|
23840 |
+
"step": 3385
|
23841 |
+
},
|
23842 |
+
{
|
23843 |
+
"epoch": 0.5870319001386962,
|
23844 |
+
"grad_norm": 6.225612163543701,
|
23845 |
+
"learning_rate": 0.0001819447495707318,
|
23846 |
+
"loss": 2.894,
|
23847 |
+
"step": 3386
|
23848 |
+
},
|
23849 |
+
{
|
23850 |
+
"epoch": 0.5872052704576977,
|
23851 |
+
"grad_norm": 4.569421768188477,
|
23852 |
+
"learning_rate": 0.00018193432435775333,
|
23853 |
+
"loss": 2.8848,
|
23854 |
+
"step": 3387
|
23855 |
+
},
|
23856 |
+
{
|
23857 |
+
"epoch": 0.587378640776699,
|
23858 |
+
"grad_norm": 4.7119855880737305,
|
23859 |
+
"learning_rate": 0.00018192389643470927,
|
23860 |
+
"loss": 2.3177,
|
23861 |
+
"step": 3388
|
23862 |
+
},
|
23863 |
+
{
|
23864 |
+
"epoch": 0.5875520110957004,
|
23865 |
+
"grad_norm": 5.0427045822143555,
|
23866 |
+
"learning_rate": 0.0001819134658019447,
|
23867 |
+
"loss": 2.4948,
|
23868 |
+
"step": 3389
|
23869 |
+
},
|
23870 |
+
{
|
23871 |
+
"epoch": 0.5877253814147018,
|
23872 |
+
"grad_norm": 4.58799409866333,
|
23873 |
+
"learning_rate": 0.00018190303245980448,
|
23874 |
+
"loss": 2.7289,
|
23875 |
+
"step": 3390
|
23876 |
+
},
|
23877 |
+
{
|
23878 |
+
"epoch": 0.5878987517337032,
|
23879 |
+
"grad_norm": 4.49941349029541,
|
23880 |
+
"learning_rate": 0.00018189259640863376,
|
23881 |
+
"loss": 2.5666,
|
23882 |
+
"step": 3391
|
23883 |
+
},
|
23884 |
+
{
|
23885 |
+
"epoch": 0.5880721220527045,
|
23886 |
+
"grad_norm": 5.272158622741699,
|
23887 |
+
"learning_rate": 0.0001818821576487777,
|
23888 |
+
"loss": 2.6216,
|
23889 |
+
"step": 3392
|
23890 |
+
},
|
23891 |
+
{
|
23892 |
+
"epoch": 0.588245492371706,
|
23893 |
+
"grad_norm": 5.748251914978027,
|
23894 |
+
"learning_rate": 0.00018187171618058164,
|
23895 |
+
"loss": 3.1265,
|
23896 |
+
"step": 3393
|
23897 |
+
},
|
23898 |
+
{
|
23899 |
+
"epoch": 0.5884188626907073,
|
23900 |
+
"grad_norm": 5.338310718536377,
|
23901 |
+
"learning_rate": 0.00018186127200439084,
|
23902 |
+
"loss": 2.5738,
|
23903 |
+
"step": 3394
|
23904 |
+
},
|
23905 |
+
{
|
23906 |
+
"epoch": 0.5885922330097088,
|
23907 |
+
"grad_norm": 5.197334289550781,
|
23908 |
+
"learning_rate": 0.00018185082512055085,
|
23909 |
+
"loss": 3.1567,
|
23910 |
+
"step": 3395
|
23911 |
+
},
|
23912 |
+
{
|
23913 |
+
"epoch": 0.5887656033287101,
|
23914 |
+
"grad_norm": 4.461606979370117,
|
23915 |
+
"learning_rate": 0.00018184037552940714,
|
23916 |
+
"loss": 2.6376,
|
23917 |
+
"step": 3396
|
23918 |
+
},
|
23919 |
+
{
|
23920 |
+
"epoch": 0.5889389736477115,
|
23921 |
+
"grad_norm": 5.124531269073486,
|
23922 |
+
"learning_rate": 0.00018182992323130537,
|
23923 |
+
"loss": 2.8104,
|
23924 |
+
"step": 3397
|
23925 |
+
},
|
23926 |
+
{
|
23927 |
+
"epoch": 0.5891123439667129,
|
23928 |
+
"grad_norm": 4.685345649719238,
|
23929 |
+
"learning_rate": 0.00018181946822659126,
|
23930 |
+
"loss": 2.7045,
|
23931 |
+
"step": 3398
|
23932 |
+
},
|
23933 |
+
{
|
23934 |
+
"epoch": 0.5892857142857143,
|
23935 |
+
"grad_norm": 4.44515323638916,
|
23936 |
+
"learning_rate": 0.00018180901051561063,
|
23937 |
+
"loss": 2.4694,
|
23938 |
+
"step": 3399
|
23939 |
+
},
|
23940 |
+
{
|
23941 |
+
"epoch": 0.5894590846047156,
|
23942 |
+
"grad_norm": 4.914605140686035,
|
23943 |
+
"learning_rate": 0.0001817985500987094,
|
23944 |
+
"loss": 2.6964,
|
23945 |
+
"step": 3400
|
23946 |
+
},
|
23947 |
+
{
|
23948 |
+
"epoch": 0.5894590846047156,
|
23949 |
+
"eval_loss": 2.65090012550354,
|
23950 |
+
"eval_runtime": 17.7427,
|
23951 |
+
"eval_samples_per_second": 26.152,
|
23952 |
+
"eval_steps_per_second": 6.538,
|
23953 |
+
"step": 3400
|
23954 |
}
|
23955 |
],
|
23956 |
"logging_steps": 1,
|
|
|
23965 |
"early_stopping_threshold": 0.0
|
23966 |
},
|
23967 |
"attributes": {
|
23968 |
+
"early_stopping_patience_counter": 3
|
23969 |
}
|
23970 |
},
|
23971 |
"TrainerControl": {
|
|
|
23974 |
"should_evaluate": false,
|
23975 |
"should_log": false,
|
23976 |
"should_save": true,
|
23977 |
+
"should_training_stop": true
|
23978 |
},
|
23979 |
"attributes": {}
|
23980 |
}
|
23981 |
},
|
23982 |
+
"total_flos": 1.688805266423808e+17,
|
23983 |
"train_batch_size": 4,
|
23984 |
"trial_name": null,
|
23985 |
"trial_params": null
|