Training in progress, step 966, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 159967880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48b44ffedd3bcc2378fb4b6ff819562642a49efef4ae3d053c11cf98092a991c
|
3 |
size 159967880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 81730644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c65324517999537d1a713978c190754cee80df1b8d07519564e3bdbbe6f6f00
|
3 |
size 81730644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:901b58ab73b39325bfea8aed8a9f472d920fed35a9e5a296018097c13d84b1ca
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:823c6cdea481d02c5473910d781463e3c449c8632b36bf92b3536a92203bd40d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.723136305809021,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-900",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6387,6 +6387,468 @@
|
|
6387 |
"eval_samples_per_second": 7.139,
|
6388 |
"eval_steps_per_second": 1.785,
|
6389 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6390 |
}
|
6391 |
],
|
6392 |
"logging_steps": 1,
|
@@ -6410,12 +6872,12 @@
|
|
6410 |
"should_evaluate": false,
|
6411 |
"should_log": false,
|
6412 |
"should_save": true,
|
6413 |
-
"should_training_stop":
|
6414 |
},
|
6415 |
"attributes": {}
|
6416 |
}
|
6417 |
},
|
6418 |
-
"total_flos": 2.
|
6419 |
"train_batch_size": 4,
|
6420 |
"trial_name": null,
|
6421 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.723136305809021,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-900",
|
4 |
+
"epoch": 0.06728190841023855,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 966,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6387 |
"eval_samples_per_second": 7.139,
|
6388 |
"eval_steps_per_second": 1.785,
|
6389 |
"step": 900
|
6390 |
+
},
|
6391 |
+
{
|
6392 |
+
"epoch": 0.06275465784433223,
|
6393 |
+
"grad_norm": 0.7109830379486084,
|
6394 |
+
"learning_rate": 2.272630321321023e-06,
|
6395 |
+
"loss": 0.704,
|
6396 |
+
"step": 901
|
6397 |
+
},
|
6398 |
+
{
|
6399 |
+
"epoch": 0.06282430785303848,
|
6400 |
+
"grad_norm": 0.4886980950832367,
|
6401 |
+
"learning_rate": 2.20349711463943e-06,
|
6402 |
+
"loss": 0.4915,
|
6403 |
+
"step": 902
|
6404 |
+
},
|
6405 |
+
{
|
6406 |
+
"epoch": 0.06289395786174473,
|
6407 |
+
"grad_norm": 0.6534592509269714,
|
6408 |
+
"learning_rate": 2.135420012462619e-06,
|
6409 |
+
"loss": 0.6073,
|
6410 |
+
"step": 903
|
6411 |
+
},
|
6412 |
+
{
|
6413 |
+
"epoch": 0.06296360787045098,
|
6414 |
+
"grad_norm": 0.5471417903900146,
|
6415 |
+
"learning_rate": 2.0683997499552632e-06,
|
6416 |
+
"loss": 0.6319,
|
6417 |
+
"step": 904
|
6418 |
+
},
|
6419 |
+
{
|
6420 |
+
"epoch": 0.06303325787915723,
|
6421 |
+
"grad_norm": 0.765691876411438,
|
6422 |
+
"learning_rate": 2.0024370508692104e-06,
|
6423 |
+
"loss": 0.9544,
|
6424 |
+
"step": 905
|
6425 |
+
},
|
6426 |
+
{
|
6427 |
+
"epoch": 0.06310290788786349,
|
6428 |
+
"grad_norm": 0.6834742426872253,
|
6429 |
+
"learning_rate": 1.9375326275357208e-06,
|
6430 |
+
"loss": 0.8162,
|
6431 |
+
"step": 906
|
6432 |
+
},
|
6433 |
+
{
|
6434 |
+
"epoch": 0.06317255789656974,
|
6435 |
+
"grad_norm": 0.7233893871307373,
|
6436 |
+
"learning_rate": 1.8736871808576861e-06,
|
6437 |
+
"loss": 1.0311,
|
6438 |
+
"step": 907
|
6439 |
+
},
|
6440 |
+
{
|
6441 |
+
"epoch": 0.06324220790527599,
|
6442 |
+
"grad_norm": 0.6150738000869751,
|
6443 |
+
"learning_rate": 1.8109014003021452e-06,
|
6444 |
+
"loss": 0.9241,
|
6445 |
+
"step": 908
|
6446 |
+
},
|
6447 |
+
{
|
6448 |
+
"epoch": 0.06331185791398224,
|
6449 |
+
"grad_norm": 0.7470687031745911,
|
6450 |
+
"learning_rate": 1.7491759638927686e-06,
|
6451 |
+
"loss": 1.1686,
|
6452 |
+
"step": 909
|
6453 |
+
},
|
6454 |
+
{
|
6455 |
+
"epoch": 0.06338150792268848,
|
6456 |
+
"grad_norm": 0.7098023295402527,
|
6457 |
+
"learning_rate": 1.6885115382026085e-06,
|
6458 |
+
"loss": 1.1531,
|
6459 |
+
"step": 910
|
6460 |
+
},
|
6461 |
+
{
|
6462 |
+
"epoch": 0.06345115793139475,
|
6463 |
+
"grad_norm": 0.6397354006767273,
|
6464 |
+
"learning_rate": 1.628908778346827e-06,
|
6465 |
+
"loss": 0.9153,
|
6466 |
+
"step": 911
|
6467 |
+
},
|
6468 |
+
{
|
6469 |
+
"epoch": 0.063520807940101,
|
6470 |
+
"grad_norm": 0.6609793305397034,
|
6471 |
+
"learning_rate": 1.5703683279756797e-06,
|
6472 |
+
"loss": 0.641,
|
6473 |
+
"step": 912
|
6474 |
+
},
|
6475 |
+
{
|
6476 |
+
"epoch": 0.06359045794880724,
|
6477 |
+
"grad_norm": 0.7062059640884399,
|
6478 |
+
"learning_rate": 1.5128908192675318e-06,
|
6479 |
+
"loss": 0.7182,
|
6480 |
+
"step": 913
|
6481 |
+
},
|
6482 |
+
{
|
6483 |
+
"epoch": 0.06366010795751349,
|
6484 |
+
"grad_norm": 0.6093196272850037,
|
6485 |
+
"learning_rate": 1.4564768729220412e-06,
|
6486 |
+
"loss": 0.6793,
|
6487 |
+
"step": 914
|
6488 |
+
},
|
6489 |
+
{
|
6490 |
+
"epoch": 0.06372975796621974,
|
6491 |
+
"grad_norm": 0.6978054642677307,
|
6492 |
+
"learning_rate": 1.401127098153443e-06,
|
6493 |
+
"loss": 0.7592,
|
6494 |
+
"step": 915
|
6495 |
+
},
|
6496 |
+
{
|
6497 |
+
"epoch": 0.063799407974926,
|
6498 |
+
"grad_norm": 0.5635403394699097,
|
6499 |
+
"learning_rate": 1.3468420926840197e-06,
|
6500 |
+
"loss": 0.869,
|
6501 |
+
"step": 916
|
6502 |
+
},
|
6503 |
+
{
|
6504 |
+
"epoch": 0.06386905798363225,
|
6505 |
+
"grad_norm": 0.6903446912765503,
|
6506 |
+
"learning_rate": 1.2936224427375521e-06,
|
6507 |
+
"loss": 0.7401,
|
6508 |
+
"step": 917
|
6509 |
+
},
|
6510 |
+
{
|
6511 |
+
"epoch": 0.0639387079923385,
|
6512 |
+
"grad_norm": 0.6210869550704956,
|
6513 |
+
"learning_rate": 1.2414687230331123e-06,
|
6514 |
+
"loss": 0.5908,
|
6515 |
+
"step": 918
|
6516 |
+
},
|
6517 |
+
{
|
6518 |
+
"epoch": 0.06400835800104475,
|
6519 |
+
"grad_norm": 0.6113409399986267,
|
6520 |
+
"learning_rate": 1.1903814967787253e-06,
|
6521 |
+
"loss": 0.5493,
|
6522 |
+
"step": 919
|
6523 |
+
},
|
6524 |
+
{
|
6525 |
+
"epoch": 0.064078008009751,
|
6526 |
+
"grad_norm": 0.9400643706321716,
|
6527 |
+
"learning_rate": 1.1403613156654059e-06,
|
6528 |
+
"loss": 1.0418,
|
6529 |
+
"step": 920
|
6530 |
+
},
|
6531 |
+
{
|
6532 |
+
"epoch": 0.06414765801845725,
|
6533 |
+
"grad_norm": 0.683574378490448,
|
6534 |
+
"learning_rate": 1.091408719861109e-06,
|
6535 |
+
"loss": 0.9345,
|
6536 |
+
"step": 921
|
6537 |
+
},
|
6538 |
+
{
|
6539 |
+
"epoch": 0.06421730802716351,
|
6540 |
+
"grad_norm": 0.7595987915992737,
|
6541 |
+
"learning_rate": 1.0435242380049559e-06,
|
6542 |
+
"loss": 0.8716,
|
6543 |
+
"step": 922
|
6544 |
+
},
|
6545 |
+
{
|
6546 |
+
"epoch": 0.06428695803586976,
|
6547 |
+
"grad_norm": 0.6851724982261658,
|
6548 |
+
"learning_rate": 9.967083872015282e-07,
|
6549 |
+
"loss": 0.5158,
|
6550 |
+
"step": 923
|
6551 |
+
},
|
6552 |
+
{
|
6553 |
+
"epoch": 0.064356608044576,
|
6554 |
+
"grad_norm": 0.6724770069122314,
|
6555 |
+
"learning_rate": 9.509616730151827e-07,
|
6556 |
+
"loss": 0.5133,
|
6557 |
+
"step": 924
|
6558 |
+
},
|
6559 |
+
{
|
6560 |
+
"epoch": 0.06442625805328225,
|
6561 |
+
"grad_norm": 0.6596947312355042,
|
6562 |
+
"learning_rate": 9.062845894647676e-07,
|
6563 |
+
"loss": 0.6722,
|
6564 |
+
"step": 925
|
6565 |
+
},
|
6566 |
+
{
|
6567 |
+
"epoch": 0.0644959080619885,
|
6568 |
+
"grad_norm": 0.5619158148765564,
|
6569 |
+
"learning_rate": 8.626776190181041e-07,
|
6570 |
+
"loss": 0.9499,
|
6571 |
+
"step": 926
|
6572 |
+
},
|
6573 |
+
{
|
6574 |
+
"epoch": 0.06456555807069476,
|
6575 |
+
"grad_norm": 0.7573150992393494,
|
6576 |
+
"learning_rate": 8.20141232586924e-07,
|
6577 |
+
"loss": 0.7521,
|
6578 |
+
"step": 927
|
6579 |
+
},
|
6580 |
+
{
|
6581 |
+
"epoch": 0.06463520807940101,
|
6582 |
+
"grad_norm": 0.6126770377159119,
|
6583 |
+
"learning_rate": 7.786758895216629e-07,
|
6584 |
+
"loss": 0.6616,
|
6585 |
+
"step": 928
|
6586 |
+
},
|
6587 |
+
{
|
6588 |
+
"epoch": 0.06470485808810726,
|
6589 |
+
"grad_norm": 0.7481774687767029,
|
6590 |
+
"learning_rate": 7.382820376066302e-07,
|
6591 |
+
"loss": 0.8779,
|
6592 |
+
"step": 929
|
6593 |
+
},
|
6594 |
+
{
|
6595 |
+
"epoch": 0.06477450809681351,
|
6596 |
+
"grad_norm": 0.7029200792312622,
|
6597 |
+
"learning_rate": 6.98960113055025e-07,
|
6598 |
+
"loss": 0.7685,
|
6599 |
+
"step": 930
|
6600 |
+
},
|
6601 |
+
{
|
6602 |
+
"epoch": 0.06484415810551976,
|
6603 |
+
"grad_norm": 0.6455416679382324,
|
6604 |
+
"learning_rate": 6.607105405043612e-07,
|
6605 |
+
"loss": 1.0069,
|
6606 |
+
"step": 931
|
6607 |
+
},
|
6608 |
+
{
|
6609 |
+
"epoch": 0.06491380811422602,
|
6610 |
+
"grad_norm": 0.7011751532554626,
|
6611 |
+
"learning_rate": 6.23533733011783e-07,
|
6612 |
+
"loss": 0.6548,
|
6613 |
+
"step": 932
|
6614 |
+
},
|
6615 |
+
{
|
6616 |
+
"epoch": 0.06498345812293227,
|
6617 |
+
"grad_norm": 0.7533524036407471,
|
6618 |
+
"learning_rate": 5.8743009204969e-07,
|
6619 |
+
"loss": 0.7463,
|
6620 |
+
"step": 933
|
6621 |
+
},
|
6622 |
+
{
|
6623 |
+
"epoch": 0.06505310813163852,
|
6624 |
+
"grad_norm": 0.5586950182914734,
|
6625 |
+
"learning_rate": 5.52400007501297e-07,
|
6626 |
+
"loss": 0.6125,
|
6627 |
+
"step": 934
|
6628 |
+
},
|
6629 |
+
{
|
6630 |
+
"epoch": 0.06512275814034477,
|
6631 |
+
"grad_norm": 0.6539096832275391,
|
6632 |
+
"learning_rate": 5.184438576565253e-07,
|
6633 |
+
"loss": 0.8559,
|
6634 |
+
"step": 935
|
6635 |
+
},
|
6636 |
+
{
|
6637 |
+
"epoch": 0.06519240814905101,
|
6638 |
+
"grad_norm": 0.7584323883056641,
|
6639 |
+
"learning_rate": 4.855620092078627e-07,
|
6640 |
+
"loss": 1.1142,
|
6641 |
+
"step": 936
|
6642 |
+
},
|
6643 |
+
{
|
6644 |
+
"epoch": 0.06526205815775726,
|
6645 |
+
"grad_norm": 0.6609397530555725,
|
6646 |
+
"learning_rate": 4.537548172464101e-07,
|
6647 |
+
"loss": 0.8978,
|
6648 |
+
"step": 937
|
6649 |
+
},
|
6650 |
+
{
|
6651 |
+
"epoch": 0.06533170816646353,
|
6652 |
+
"grad_norm": 0.6159988641738892,
|
6653 |
+
"learning_rate": 4.230226252580516e-07,
|
6654 |
+
"loss": 0.6993,
|
6655 |
+
"step": 938
|
6656 |
+
},
|
6657 |
+
{
|
6658 |
+
"epoch": 0.06540135817516977,
|
6659 |
+
"grad_norm": 0.6153664588928223,
|
6660 |
+
"learning_rate": 3.9336576511976863e-07,
|
6661 |
+
"loss": 0.4574,
|
6662 |
+
"step": 939
|
6663 |
+
},
|
6664 |
+
{
|
6665 |
+
"epoch": 0.06547100818387602,
|
6666 |
+
"grad_norm": 0.6489300727844238,
|
6667 |
+
"learning_rate": 3.6478455709598734e-07,
|
6668 |
+
"loss": 0.7568,
|
6669 |
+
"step": 940
|
6670 |
+
},
|
6671 |
+
{
|
6672 |
+
"epoch": 0.06554065819258227,
|
6673 |
+
"grad_norm": 0.6248874664306641,
|
6674 |
+
"learning_rate": 3.372793098352256e-07,
|
6675 |
+
"loss": 0.6879,
|
6676 |
+
"step": 941
|
6677 |
+
},
|
6678 |
+
{
|
6679 |
+
"epoch": 0.06561030820128852,
|
6680 |
+
"grad_norm": 0.5801978707313538,
|
6681 |
+
"learning_rate": 3.108503203666402e-07,
|
6682 |
+
"loss": 0.7331,
|
6683 |
+
"step": 942
|
6684 |
+
},
|
6685 |
+
{
|
6686 |
+
"epoch": 0.06567995820999478,
|
6687 |
+
"grad_norm": 0.605501115322113,
|
6688 |
+
"learning_rate": 2.8549787409691833e-07,
|
6689 |
+
"loss": 0.6179,
|
6690 |
+
"step": 943
|
6691 |
+
},
|
6692 |
+
{
|
6693 |
+
"epoch": 0.06574960821870103,
|
6694 |
+
"grad_norm": 0.5972608327865601,
|
6695 |
+
"learning_rate": 2.6122224480715775e-07,
|
6696 |
+
"loss": 0.6514,
|
6697 |
+
"step": 944
|
6698 |
+
},
|
6699 |
+
{
|
6700 |
+
"epoch": 0.06581925822740728,
|
6701 |
+
"grad_norm": 0.7556172609329224,
|
6702 |
+
"learning_rate": 2.380236946498693e-07,
|
6703 |
+
"loss": 0.8719,
|
6704 |
+
"step": 945
|
6705 |
+
},
|
6706 |
+
{
|
6707 |
+
"epoch": 0.06588890823611353,
|
6708 |
+
"grad_norm": 0.6486802101135254,
|
6709 |
+
"learning_rate": 2.1590247414624566e-07,
|
6710 |
+
"loss": 0.5719,
|
6711 |
+
"step": 946
|
6712 |
+
},
|
6713 |
+
{
|
6714 |
+
"epoch": 0.06595855824481978,
|
6715 |
+
"grad_norm": 0.638469398021698,
|
6716 |
+
"learning_rate": 1.948588221833303e-07,
|
6717 |
+
"loss": 0.6393,
|
6718 |
+
"step": 947
|
6719 |
+
},
|
6720 |
+
{
|
6721 |
+
"epoch": 0.06602820825352604,
|
6722 |
+
"grad_norm": 0.7082604765892029,
|
6723 |
+
"learning_rate": 1.7489296601156392e-07,
|
6724 |
+
"loss": 1.0018,
|
6725 |
+
"step": 948
|
6726 |
+
},
|
6727 |
+
{
|
6728 |
+
"epoch": 0.06609785826223229,
|
6729 |
+
"grad_norm": 0.6530460119247437,
|
6730 |
+
"learning_rate": 1.5600512124221978e-07,
|
6731 |
+
"loss": 0.7418,
|
6732 |
+
"step": 949
|
6733 |
+
},
|
6734 |
+
{
|
6735 |
+
"epoch": 0.06616750827093854,
|
6736 |
+
"grad_norm": 0.653685986995697,
|
6737 |
+
"learning_rate": 1.3819549184516112e-07,
|
6738 |
+
"loss": 0.9309,
|
6739 |
+
"step": 950
|
6740 |
+
},
|
6741 |
+
{
|
6742 |
+
"epoch": 0.06623715827964478,
|
6743 |
+
"grad_norm": 0.5263675451278687,
|
6744 |
+
"learning_rate": 1.2146427014657625e-07,
|
6745 |
+
"loss": 0.7189,
|
6746 |
+
"step": 951
|
6747 |
+
},
|
6748 |
+
{
|
6749 |
+
"epoch": 0.06630680828835103,
|
6750 |
+
"grad_norm": 0.6783672571182251,
|
6751 |
+
"learning_rate": 1.0581163682695793e-07,
|
6752 |
+
"loss": 0.5871,
|
6753 |
+
"step": 952
|
6754 |
+
},
|
6755 |
+
{
|
6756 |
+
"epoch": 0.06637645829705728,
|
6757 |
+
"grad_norm": 0.4727168083190918,
|
6758 |
+
"learning_rate": 9.123776091908287e-08,
|
6759 |
+
"loss": 0.3484,
|
6760 |
+
"step": 953
|
6761 |
+
},
|
6762 |
+
{
|
6763 |
+
"epoch": 0.06644610830576354,
|
6764 |
+
"grad_norm": 0.5385925769805908,
|
6765 |
+
"learning_rate": 7.774279980626853e-08,
|
6766 |
+
"loss": 0.5899,
|
6767 |
+
"step": 954
|
6768 |
+
},
|
6769 |
+
{
|
6770 |
+
"epoch": 0.06651575831446979,
|
6771 |
+
"grad_norm": 0.6668855547904968,
|
6772 |
+
"learning_rate": 6.532689922059687e-08,
|
6773 |
+
"loss": 1.0131,
|
6774 |
+
"step": 955
|
6775 |
+
},
|
6776 |
+
{
|
6777 |
+
"epoch": 0.06658540832317604,
|
6778 |
+
"grad_norm": 0.6244344115257263,
|
6779 |
+
"learning_rate": 5.3990193241393313e-08,
|
6780 |
+
"loss": 0.7458,
|
6781 |
+
"step": 956
|
6782 |
+
},
|
6783 |
+
{
|
6784 |
+
"epoch": 0.06665505833188229,
|
6785 |
+
"grad_norm": 0.6702743768692017,
|
6786 |
+
"learning_rate": 4.373280429375015e-08,
|
6787 |
+
"loss": 0.8924,
|
6788 |
+
"step": 957
|
6789 |
+
},
|
6790 |
+
{
|
6791 |
+
"epoch": 0.06672470834058854,
|
6792 |
+
"grad_norm": 0.6103947758674622,
|
6793 |
+
"learning_rate": 3.4554843147216464e-08,
|
6794 |
+
"loss": 1.0036,
|
6795 |
+
"step": 958
|
6796 |
+
},
|
6797 |
+
{
|
6798 |
+
"epoch": 0.0667943583492948,
|
6799 |
+
"grad_norm": 0.622797966003418,
|
6800 |
+
"learning_rate": 2.6456408914599108e-08,
|
6801 |
+
"loss": 0.8497,
|
6802 |
+
"step": 959
|
6803 |
+
},
|
6804 |
+
{
|
6805 |
+
"epoch": 0.06686400835800105,
|
6806 |
+
"grad_norm": 0.7076674699783325,
|
6807 |
+
"learning_rate": 1.9437589050907977e-08,
|
6808 |
+
"loss": 0.5629,
|
6809 |
+
"step": 960
|
6810 |
+
},
|
6811 |
+
{
|
6812 |
+
"epoch": 0.0669336583667073,
|
6813 |
+
"grad_norm": 0.7682867050170898,
|
6814 |
+
"learning_rate": 1.3498459352367931e-08,
|
6815 |
+
"loss": 0.7463,
|
6816 |
+
"step": 961
|
6817 |
+
},
|
6818 |
+
{
|
6819 |
+
"epoch": 0.06700330837541355,
|
6820 |
+
"grad_norm": 0.7987236380577087,
|
6821 |
+
"learning_rate": 8.639083955663818e-09,
|
6822 |
+
"loss": 1.1664,
|
6823 |
+
"step": 962
|
6824 |
+
},
|
6825 |
+
{
|
6826 |
+
"epoch": 0.0670729583841198,
|
6827 |
+
"grad_norm": 0.7837391495704651,
|
6828 |
+
"learning_rate": 4.859515337174436e-09,
|
6829 |
+
"loss": 0.6505,
|
6830 |
+
"step": 963
|
6831 |
+
},
|
6832 |
+
{
|
6833 |
+
"epoch": 0.06714260839282606,
|
6834 |
+
"grad_norm": 0.6566223502159119,
|
6835 |
+
"learning_rate": 2.1597943124729292e-09,
|
6836 |
+
"loss": 0.8524,
|
6837 |
+
"step": 964
|
6838 |
+
},
|
6839 |
+
{
|
6840 |
+
"epoch": 0.0672122584015323,
|
6841 |
+
"grad_norm": 0.6998875737190247,
|
6842 |
+
"learning_rate": 5.399500358493903e-10,
|
6843 |
+
"loss": 0.8817,
|
6844 |
+
"step": 965
|
6845 |
+
},
|
6846 |
+
{
|
6847 |
+
"epoch": 0.06728190841023855,
|
6848 |
+
"grad_norm": 0.6083624362945557,
|
6849 |
+
"learning_rate": 0.0,
|
6850 |
+
"loss": 0.8767,
|
6851 |
+
"step": 966
|
6852 |
}
|
6853 |
],
|
6854 |
"logging_steps": 1,
|
|
|
6872 |
"should_evaluate": false,
|
6873 |
"should_log": false,
|
6874 |
"should_save": true,
|
6875 |
+
"should_training_stop": true
|
6876 |
},
|
6877 |
"attributes": {}
|
6878 |
}
|
6879 |
},
|
6880 |
+
"total_flos": 2.628352553502376e+18,
|
6881 |
"train_batch_size": 4,
|
6882 |
"trial_name": null,
|
6883 |
"trial_params": null
|