Training in progress, step 4500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 141303176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7066ad238f161a22d0683e51ba0e6258706bc9aa90c5787542555b1a4a97128d
|
3 |
size 141303176
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 282653387
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:163c91ff02e2ddb5dacee5972ca6bb5038e19cbb07bed2cea725f9d8a4a6a70f
|
3 |
size 282653387
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14455
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
|
3 |
size 14455
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77fc95ff7d80af03c17939ca85b9568e652a4cf9d2e9aaf323a942fee4510f31
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -632,6 +632,84 @@
|
|
632 |
"eval_samples_per_second": 10.579,
|
633 |
"eval_steps_per_second": 2.646,
|
634 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
}
|
636 |
],
|
637 |
"logging_steps": 50,
|
@@ -651,7 +729,7 @@
|
|
651 |
"attributes": {}
|
652 |
}
|
653 |
},
|
654 |
-
"total_flos": 1.
|
655 |
"train_batch_size": 4,
|
656 |
"trial_name": null,
|
657 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.0805199801384049,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 4500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
632 |
"eval_samples_per_second": 10.579,
|
633 |
"eval_steps_per_second": 2.646,
|
634 |
"step": 4000
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 0.07246798212456441,
|
638 |
+
"grad_norm": 0.6943932175636292,
|
639 |
+
"learning_rate": 4.024850894632207e-05,
|
640 |
+
"loss": 0.5381,
|
641 |
+
"step": 4050
|
642 |
+
},
|
643 |
+
{
|
644 |
+
"epoch": 0.07336264857054668,
|
645 |
+
"grad_norm": 0.7221015095710754,
|
646 |
+
"learning_rate": 4.07455268389662e-05,
|
647 |
+
"loss": 0.5521,
|
648 |
+
"step": 4100
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"epoch": 0.07425731501652896,
|
652 |
+
"grad_norm": 0.6903452277183533,
|
653 |
+
"learning_rate": 4.124254473161034e-05,
|
654 |
+
"loss": 0.5389,
|
655 |
+
"step": 4150
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"epoch": 0.07515198146251124,
|
659 |
+
"grad_norm": 0.7749171853065491,
|
660 |
+
"learning_rate": 4.173956262425447e-05,
|
661 |
+
"loss": 0.539,
|
662 |
+
"step": 4200
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 0.07604664790849351,
|
666 |
+
"grad_norm": 0.7912935614585876,
|
667 |
+
"learning_rate": 4.223658051689861e-05,
|
668 |
+
"loss": 0.5363,
|
669 |
+
"step": 4250
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.07694131435447579,
|
673 |
+
"grad_norm": 0.7532743811607361,
|
674 |
+
"learning_rate": 4.273359840954275e-05,
|
675 |
+
"loss": 0.5376,
|
676 |
+
"step": 4300
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 0.07783598080045807,
|
680 |
+
"grad_norm": 0.6006184220314026,
|
681 |
+
"learning_rate": 4.323061630218688e-05,
|
682 |
+
"loss": 0.5348,
|
683 |
+
"step": 4350
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 0.07873064724644034,
|
687 |
+
"grad_norm": 0.6265826225280762,
|
688 |
+
"learning_rate": 4.372763419483102e-05,
|
689 |
+
"loss": 0.5346,
|
690 |
+
"step": 4400
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 0.07962531369242262,
|
694 |
+
"grad_norm": 0.6724209189414978,
|
695 |
+
"learning_rate": 4.422465208747515e-05,
|
696 |
+
"loss": 0.5312,
|
697 |
+
"step": 4450
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 0.0805199801384049,
|
701 |
+
"grad_norm": 0.8128370046615601,
|
702 |
+
"learning_rate": 4.472166998011928e-05,
|
703 |
+
"loss": 0.5365,
|
704 |
+
"step": 4500
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 0.0805199801384049,
|
708 |
+
"eval_loss": 0.509487509727478,
|
709 |
+
"eval_runtime": 834.3525,
|
710 |
+
"eval_samples_per_second": 10.826,
|
711 |
+
"eval_steps_per_second": 2.707,
|
712 |
+
"step": 4500
|
713 |
}
|
714 |
],
|
715 |
"logging_steps": 50,
|
|
|
729 |
"attributes": {}
|
730 |
}
|
731 |
},
|
732 |
+
"total_flos": 1.6735071633408e+16,
|
733 |
"train_batch_size": 4,
|
734 |
"trial_name": null,
|
735 |
"trial_params": null
|