Training in progress, step 12140, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3235581408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ad4ee78050500332e9fe024c0502189ebfaf38f573b0c21a0d3771fabad84e8
|
3 |
size 3235581408
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6456162404
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3c8c19e4ce061f151c2c13a979ffd5d4c49db90ea0cacec043cbd741d2e7c5c
|
3 |
size 6456162404
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:206b3df34cb1f66beca4bc0e1c535718212418c290c37c48ba6b6232878b0772
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e7e82867a787951c3c0beaf0d201388f2e0e11500ad9fce0779bc92bd09f5ff
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48e52c9ce0ead365877b084af84d07ca4e997f15d4cc6dbff0207b3b8b5cf92b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch":
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -84040,6 +84040,986 @@
|
|
84040 |
"learning_rate": 1.215880893300248e-06,
|
84041 |
"loss": 0.0365,
|
84042 |
"step": 12000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84043 |
}
|
84044 |
],
|
84045 |
"logging_steps": 1,
|
@@ -84054,12 +85034,12 @@
|
|
84054 |
"should_evaluate": false,
|
84055 |
"should_log": false,
|
84056 |
"should_save": true,
|
84057 |
-
"should_training_stop":
|
84058 |
},
|
84059 |
"attributes": {}
|
84060 |
}
|
84061 |
},
|
84062 |
-
"total_flos": 3.
|
84063 |
"train_batch_size": 16,
|
84064 |
"trial_name": null,
|
84065 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 5.0,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 12140,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
84040 |
"learning_rate": 1.215880893300248e-06,
|
84041 |
"loss": 0.0365,
|
84042 |
"step": 12000
|
84043 |
+
},
|
84044 |
+
{
|
84045 |
+
"epoch": 4.9427512355848435,
|
84046 |
+
"grad_norm": 0.8760747909545898,
|
84047 |
+
"learning_rate": 1.207609594706369e-06,
|
84048 |
+
"loss": 0.0409,
|
84049 |
+
"step": 12001
|
84050 |
+
},
|
84051 |
+
{
|
84052 |
+
"epoch": 4.943163097199341,
|
84053 |
+
"grad_norm": 0.4808822572231293,
|
84054 |
+
"learning_rate": 1.1993382961124897e-06,
|
84055 |
+
"loss": 0.0151,
|
84056 |
+
"step": 12002
|
84057 |
+
},
|
84058 |
+
{
|
84059 |
+
"epoch": 4.943574958813839,
|
84060 |
+
"grad_norm": 0.9200926423072815,
|
84061 |
+
"learning_rate": 1.1910669975186105e-06,
|
84062 |
+
"loss": 0.0424,
|
84063 |
+
"step": 12003
|
84064 |
+
},
|
84065 |
+
{
|
84066 |
+
"epoch": 4.943986820428336,
|
84067 |
+
"grad_norm": 0.8151233196258545,
|
84068 |
+
"learning_rate": 1.1827956989247313e-06,
|
84069 |
+
"loss": 0.0265,
|
84070 |
+
"step": 12004
|
84071 |
+
},
|
84072 |
+
{
|
84073 |
+
"epoch": 4.944398682042833,
|
84074 |
+
"grad_norm": 1.4278342723846436,
|
84075 |
+
"learning_rate": 1.174524400330852e-06,
|
84076 |
+
"loss": 0.0557,
|
84077 |
+
"step": 12005
|
84078 |
+
},
|
84079 |
+
{
|
84080 |
+
"epoch": 4.944810543657331,
|
84081 |
+
"grad_norm": 0.5536609292030334,
|
84082 |
+
"learning_rate": 1.1662531017369729e-06,
|
84083 |
+
"loss": 0.0214,
|
84084 |
+
"step": 12006
|
84085 |
+
},
|
84086 |
+
{
|
84087 |
+
"epoch": 4.945222405271829,
|
84088 |
+
"grad_norm": 0.5233038663864136,
|
84089 |
+
"learning_rate": 1.1579818031430934e-06,
|
84090 |
+
"loss": 0.0204,
|
84091 |
+
"step": 12007
|
84092 |
+
},
|
84093 |
+
{
|
84094 |
+
"epoch": 4.9456342668863265,
|
84095 |
+
"grad_norm": 0.6543901562690735,
|
84096 |
+
"learning_rate": 1.1497105045492142e-06,
|
84097 |
+
"loss": 0.0213,
|
84098 |
+
"step": 12008
|
84099 |
+
},
|
84100 |
+
{
|
84101 |
+
"epoch": 4.946046128500823,
|
84102 |
+
"grad_norm": 1.1069648265838623,
|
84103 |
+
"learning_rate": 1.1414392059553352e-06,
|
84104 |
+
"loss": 0.0663,
|
84105 |
+
"step": 12009
|
84106 |
+
},
|
84107 |
+
{
|
84108 |
+
"epoch": 4.946457990115321,
|
84109 |
+
"grad_norm": 0.40918126702308655,
|
84110 |
+
"learning_rate": 1.1331679073614558e-06,
|
84111 |
+
"loss": 0.0124,
|
84112 |
+
"step": 12010
|
84113 |
+
},
|
84114 |
+
{
|
84115 |
+
"epoch": 4.946869851729819,
|
84116 |
+
"grad_norm": 1.308690071105957,
|
84117 |
+
"learning_rate": 1.1248966087675766e-06,
|
84118 |
+
"loss": 0.0677,
|
84119 |
+
"step": 12011
|
84120 |
+
},
|
84121 |
+
{
|
84122 |
+
"epoch": 4.947281713344316,
|
84123 |
+
"grad_norm": 1.826087474822998,
|
84124 |
+
"learning_rate": 1.1166253101736972e-06,
|
84125 |
+
"loss": 0.0498,
|
84126 |
+
"step": 12012
|
84127 |
+
},
|
84128 |
+
{
|
84129 |
+
"epoch": 4.947693574958814,
|
84130 |
+
"grad_norm": 0.9981445074081421,
|
84131 |
+
"learning_rate": 1.1083540115798182e-06,
|
84132 |
+
"loss": 0.044,
|
84133 |
+
"step": 12013
|
84134 |
+
},
|
84135 |
+
{
|
84136 |
+
"epoch": 4.948105436573312,
|
84137 |
+
"grad_norm": 0.7710696458816528,
|
84138 |
+
"learning_rate": 1.100082712985939e-06,
|
84139 |
+
"loss": 0.0331,
|
84140 |
+
"step": 12014
|
84141 |
+
},
|
84142 |
+
{
|
84143 |
+
"epoch": 4.948517298187809,
|
84144 |
+
"grad_norm": 0.6384889483451843,
|
84145 |
+
"learning_rate": 1.0918114143920596e-06,
|
84146 |
+
"loss": 0.0272,
|
84147 |
+
"step": 12015
|
84148 |
+
},
|
84149 |
+
{
|
84150 |
+
"epoch": 4.948929159802306,
|
84151 |
+
"grad_norm": 0.5777890682220459,
|
84152 |
+
"learning_rate": 1.0835401157981803e-06,
|
84153 |
+
"loss": 0.0228,
|
84154 |
+
"step": 12016
|
84155 |
+
},
|
84156 |
+
{
|
84157 |
+
"epoch": 4.949341021416804,
|
84158 |
+
"grad_norm": 0.5390148758888245,
|
84159 |
+
"learning_rate": 1.0752688172043011e-06,
|
84160 |
+
"loss": 0.022,
|
84161 |
+
"step": 12017
|
84162 |
+
},
|
84163 |
+
{
|
84164 |
+
"epoch": 4.949752883031302,
|
84165 |
+
"grad_norm": 1.009461522102356,
|
84166 |
+
"learning_rate": 1.066997518610422e-06,
|
84167 |
+
"loss": 0.0558,
|
84168 |
+
"step": 12018
|
84169 |
+
},
|
84170 |
+
{
|
84171 |
+
"epoch": 4.950164744645799,
|
84172 |
+
"grad_norm": 0.931441068649292,
|
84173 |
+
"learning_rate": 1.0587262200165427e-06,
|
84174 |
+
"loss": 0.0437,
|
84175 |
+
"step": 12019
|
84176 |
+
},
|
84177 |
+
{
|
84178 |
+
"epoch": 4.950576606260297,
|
84179 |
+
"grad_norm": 0.8924103379249573,
|
84180 |
+
"learning_rate": 1.0504549214226633e-06,
|
84181 |
+
"loss": 0.0623,
|
84182 |
+
"step": 12020
|
84183 |
+
},
|
84184 |
+
{
|
84185 |
+
"epoch": 4.950988467874794,
|
84186 |
+
"grad_norm": 0.8271609544754028,
|
84187 |
+
"learning_rate": 1.0421836228287843e-06,
|
84188 |
+
"loss": 0.0327,
|
84189 |
+
"step": 12021
|
84190 |
+
},
|
84191 |
+
{
|
84192 |
+
"epoch": 4.951400329489291,
|
84193 |
+
"grad_norm": 1.104325294494629,
|
84194 |
+
"learning_rate": 1.0339123242349049e-06,
|
84195 |
+
"loss": 0.0437,
|
84196 |
+
"step": 12022
|
84197 |
+
},
|
84198 |
+
{
|
84199 |
+
"epoch": 4.951812191103789,
|
84200 |
+
"grad_norm": 0.6963580250740051,
|
84201 |
+
"learning_rate": 1.0256410256410257e-06,
|
84202 |
+
"loss": 0.0289,
|
84203 |
+
"step": 12023
|
84204 |
+
},
|
84205 |
+
{
|
84206 |
+
"epoch": 4.952224052718287,
|
84207 |
+
"grad_norm": 1.1273614168167114,
|
84208 |
+
"learning_rate": 1.0173697270471465e-06,
|
84209 |
+
"loss": 0.0553,
|
84210 |
+
"step": 12024
|
84211 |
+
},
|
84212 |
+
{
|
84213 |
+
"epoch": 4.9526359143327845,
|
84214 |
+
"grad_norm": 0.8829939961433411,
|
84215 |
+
"learning_rate": 1.0090984284532673e-06,
|
84216 |
+
"loss": 0.0433,
|
84217 |
+
"step": 12025
|
84218 |
+
},
|
84219 |
+
{
|
84220 |
+
"epoch": 4.953047775947281,
|
84221 |
+
"grad_norm": 1.4629536867141724,
|
84222 |
+
"learning_rate": 1.000827129859388e-06,
|
84223 |
+
"loss": 0.0425,
|
84224 |
+
"step": 12026
|
84225 |
+
},
|
84226 |
+
{
|
84227 |
+
"epoch": 4.953459637561779,
|
84228 |
+
"grad_norm": 0.7288837432861328,
|
84229 |
+
"learning_rate": 9.925558312655088e-07,
|
84230 |
+
"loss": 0.0339,
|
84231 |
+
"step": 12027
|
84232 |
+
},
|
84233 |
+
{
|
84234 |
+
"epoch": 4.953871499176277,
|
84235 |
+
"grad_norm": 1.1095249652862549,
|
84236 |
+
"learning_rate": 9.842845326716294e-07,
|
84237 |
+
"loss": 0.0524,
|
84238 |
+
"step": 12028
|
84239 |
+
},
|
84240 |
+
{
|
84241 |
+
"epoch": 4.954283360790774,
|
84242 |
+
"grad_norm": 0.9914068579673767,
|
84243 |
+
"learning_rate": 9.760132340777504e-07,
|
84244 |
+
"loss": 0.0394,
|
84245 |
+
"step": 12029
|
84246 |
+
},
|
84247 |
+
{
|
84248 |
+
"epoch": 4.954695222405272,
|
84249 |
+
"grad_norm": 1.1212928295135498,
|
84250 |
+
"learning_rate": 9.67741935483871e-07,
|
84251 |
+
"loss": 0.0427,
|
84252 |
+
"step": 12030
|
84253 |
+
},
|
84254 |
+
{
|
84255 |
+
"epoch": 4.95510708401977,
|
84256 |
+
"grad_norm": 0.7218902707099915,
|
84257 |
+
"learning_rate": 9.594706368899918e-07,
|
84258 |
+
"loss": 0.0379,
|
84259 |
+
"step": 12031
|
84260 |
+
},
|
84261 |
+
{
|
84262 |
+
"epoch": 4.9555189456342665,
|
84263 |
+
"grad_norm": 0.48959654569625854,
|
84264 |
+
"learning_rate": 9.511993382961125e-07,
|
84265 |
+
"loss": 0.0204,
|
84266 |
+
"step": 12032
|
84267 |
+
},
|
84268 |
+
{
|
84269 |
+
"epoch": 4.955930807248764,
|
84270 |
+
"grad_norm": 0.5668047070503235,
|
84271 |
+
"learning_rate": 9.429280397022334e-07,
|
84272 |
+
"loss": 0.0242,
|
84273 |
+
"step": 12033
|
84274 |
+
},
|
84275 |
+
{
|
84276 |
+
"epoch": 4.956342668863262,
|
84277 |
+
"grad_norm": 1.2100036144256592,
|
84278 |
+
"learning_rate": 9.346567411083541e-07,
|
84279 |
+
"loss": 0.0261,
|
84280 |
+
"step": 12034
|
84281 |
+
},
|
84282 |
+
{
|
84283 |
+
"epoch": 4.95675453047776,
|
84284 |
+
"grad_norm": 0.9323378801345825,
|
84285 |
+
"learning_rate": 9.263854425144749e-07,
|
84286 |
+
"loss": 0.0442,
|
84287 |
+
"step": 12035
|
84288 |
+
},
|
84289 |
+
{
|
84290 |
+
"epoch": 4.957166392092257,
|
84291 |
+
"grad_norm": 1.1071078777313232,
|
84292 |
+
"learning_rate": 9.181141439205955e-07,
|
84293 |
+
"loss": 0.0775,
|
84294 |
+
"step": 12036
|
84295 |
+
},
|
84296 |
+
{
|
84297 |
+
"epoch": 4.957578253706755,
|
84298 |
+
"grad_norm": 1.3353780508041382,
|
84299 |
+
"learning_rate": 9.098428453267164e-07,
|
84300 |
+
"loss": 0.0734,
|
84301 |
+
"step": 12037
|
84302 |
+
},
|
84303 |
+
{
|
84304 |
+
"epoch": 4.957990115321252,
|
84305 |
+
"grad_norm": 0.6077889204025269,
|
84306 |
+
"learning_rate": 9.015715467328371e-07,
|
84307 |
+
"loss": 0.0203,
|
84308 |
+
"step": 12038
|
84309 |
+
},
|
84310 |
+
{
|
84311 |
+
"epoch": 4.9584019769357495,
|
84312 |
+
"grad_norm": 1.1659730672836304,
|
84313 |
+
"learning_rate": 8.933002481389579e-07,
|
84314 |
+
"loss": 0.0337,
|
84315 |
+
"step": 12039
|
84316 |
+
},
|
84317 |
+
{
|
84318 |
+
"epoch": 4.958813838550247,
|
84319 |
+
"grad_norm": 0.5649575591087341,
|
84320 |
+
"learning_rate": 8.850289495450786e-07,
|
84321 |
+
"loss": 0.0195,
|
84322 |
+
"step": 12040
|
84323 |
+
},
|
84324 |
+
{
|
84325 |
+
"epoch": 4.959225700164745,
|
84326 |
+
"grad_norm": 1.04268479347229,
|
84327 |
+
"learning_rate": 8.767576509511995e-07,
|
84328 |
+
"loss": 0.0538,
|
84329 |
+
"step": 12041
|
84330 |
+
},
|
84331 |
+
{
|
84332 |
+
"epoch": 4.9596375617792425,
|
84333 |
+
"grad_norm": 0.9538731575012207,
|
84334 |
+
"learning_rate": 8.684863523573202e-07,
|
84335 |
+
"loss": 0.0353,
|
84336 |
+
"step": 12042
|
84337 |
+
},
|
84338 |
+
{
|
84339 |
+
"epoch": 4.960049423393739,
|
84340 |
+
"grad_norm": 1.29489266872406,
|
84341 |
+
"learning_rate": 8.602150537634409e-07,
|
84342 |
+
"loss": 0.0386,
|
84343 |
+
"step": 12043
|
84344 |
+
},
|
84345 |
+
{
|
84346 |
+
"epoch": 4.960461285008237,
|
84347 |
+
"grad_norm": 0.7381988167762756,
|
84348 |
+
"learning_rate": 8.519437551695617e-07,
|
84349 |
+
"loss": 0.0361,
|
84350 |
+
"step": 12044
|
84351 |
+
},
|
84352 |
+
{
|
84353 |
+
"epoch": 4.960873146622735,
|
84354 |
+
"grad_norm": 0.8122274875640869,
|
84355 |
+
"learning_rate": 8.436724565756826e-07,
|
84356 |
+
"loss": 0.0326,
|
84357 |
+
"step": 12045
|
84358 |
+
},
|
84359 |
+
{
|
84360 |
+
"epoch": 4.961285008237232,
|
84361 |
+
"grad_norm": 1.0467932224273682,
|
84362 |
+
"learning_rate": 8.354011579818032e-07,
|
84363 |
+
"loss": 0.0474,
|
84364 |
+
"step": 12046
|
84365 |
+
},
|
84366 |
+
{
|
84367 |
+
"epoch": 4.96169686985173,
|
84368 |
+
"grad_norm": 1.3369314670562744,
|
84369 |
+
"learning_rate": 8.271298593879239e-07,
|
84370 |
+
"loss": 0.07,
|
84371 |
+
"step": 12047
|
84372 |
+
},
|
84373 |
+
{
|
84374 |
+
"epoch": 4.962108731466227,
|
84375 |
+
"grad_norm": 0.9849840998649597,
|
84376 |
+
"learning_rate": 8.188585607940446e-07,
|
84377 |
+
"loss": 0.0438,
|
84378 |
+
"step": 12048
|
84379 |
+
},
|
84380 |
+
{
|
84381 |
+
"epoch": 4.962520593080725,
|
84382 |
+
"grad_norm": 0.9137307405471802,
|
84383 |
+
"learning_rate": 8.105872622001655e-07,
|
84384 |
+
"loss": 0.0329,
|
84385 |
+
"step": 12049
|
84386 |
+
},
|
84387 |
+
{
|
84388 |
+
"epoch": 4.962932454695222,
|
84389 |
+
"grad_norm": 1.0700104236602783,
|
84390 |
+
"learning_rate": 8.023159636062863e-07,
|
84391 |
+
"loss": 0.052,
|
84392 |
+
"step": 12050
|
84393 |
+
},
|
84394 |
+
{
|
84395 |
+
"epoch": 4.96334431630972,
|
84396 |
+
"grad_norm": 1.1193039417266846,
|
84397 |
+
"learning_rate": 7.94044665012407e-07,
|
84398 |
+
"loss": 0.0434,
|
84399 |
+
"step": 12051
|
84400 |
+
},
|
84401 |
+
{
|
84402 |
+
"epoch": 4.963756177924218,
|
84403 |
+
"grad_norm": 0.7343298196792603,
|
84404 |
+
"learning_rate": 7.857733664185277e-07,
|
84405 |
+
"loss": 0.031,
|
84406 |
+
"step": 12052
|
84407 |
+
},
|
84408 |
+
{
|
84409 |
+
"epoch": 4.964168039538715,
|
84410 |
+
"grad_norm": 0.9433478713035583,
|
84411 |
+
"learning_rate": 7.775020678246485e-07,
|
84412 |
+
"loss": 0.04,
|
84413 |
+
"step": 12053
|
84414 |
+
},
|
84415 |
+
{
|
84416 |
+
"epoch": 4.964579901153213,
|
84417 |
+
"grad_norm": 0.8323972225189209,
|
84418 |
+
"learning_rate": 7.692307692307694e-07,
|
84419 |
+
"loss": 0.0348,
|
84420 |
+
"step": 12054
|
84421 |
+
},
|
84422 |
+
{
|
84423 |
+
"epoch": 4.96499176276771,
|
84424 |
+
"grad_norm": 0.9453573226928711,
|
84425 |
+
"learning_rate": 7.6095947063689e-07,
|
84426 |
+
"loss": 0.0482,
|
84427 |
+
"step": 12055
|
84428 |
+
},
|
84429 |
+
{
|
84430 |
+
"epoch": 4.9654036243822075,
|
84431 |
+
"grad_norm": 1.136635422706604,
|
84432 |
+
"learning_rate": 7.526881720430108e-07,
|
84433 |
+
"loss": 0.059,
|
84434 |
+
"step": 12056
|
84435 |
+
},
|
84436 |
+
{
|
84437 |
+
"epoch": 4.965815485996705,
|
84438 |
+
"grad_norm": 0.8542512059211731,
|
84439 |
+
"learning_rate": 7.444168734491315e-07,
|
84440 |
+
"loss": 0.0364,
|
84441 |
+
"step": 12057
|
84442 |
+
},
|
84443 |
+
{
|
84444 |
+
"epoch": 4.966227347611203,
|
84445 |
+
"grad_norm": 1.3132659196853638,
|
84446 |
+
"learning_rate": 7.361455748552523e-07,
|
84447 |
+
"loss": 0.0506,
|
84448 |
+
"step": 12058
|
84449 |
+
},
|
84450 |
+
{
|
84451 |
+
"epoch": 4.966639209225701,
|
84452 |
+
"grad_norm": 1.8883720636367798,
|
84453 |
+
"learning_rate": 7.278742762613731e-07,
|
84454 |
+
"loss": 0.0721,
|
84455 |
+
"step": 12059
|
84456 |
+
},
|
84457 |
+
{
|
84458 |
+
"epoch": 4.967051070840197,
|
84459 |
+
"grad_norm": 1.7649009227752686,
|
84460 |
+
"learning_rate": 7.196029776674939e-07,
|
84461 |
+
"loss": 0.1001,
|
84462 |
+
"step": 12060
|
84463 |
+
},
|
84464 |
+
{
|
84465 |
+
"epoch": 4.967462932454695,
|
84466 |
+
"grad_norm": 0.691087543964386,
|
84467 |
+
"learning_rate": 7.113316790736146e-07,
|
84468 |
+
"loss": 0.0251,
|
84469 |
+
"step": 12061
|
84470 |
+
},
|
84471 |
+
{
|
84472 |
+
"epoch": 4.967874794069193,
|
84473 |
+
"grad_norm": 1.228608250617981,
|
84474 |
+
"learning_rate": 7.030603804797354e-07,
|
84475 |
+
"loss": 0.0724,
|
84476 |
+
"step": 12062
|
84477 |
+
},
|
84478 |
+
{
|
84479 |
+
"epoch": 4.96828665568369,
|
84480 |
+
"grad_norm": 0.6133949756622314,
|
84481 |
+
"learning_rate": 6.947890818858562e-07,
|
84482 |
+
"loss": 0.0379,
|
84483 |
+
"step": 12063
|
84484 |
+
},
|
84485 |
+
{
|
84486 |
+
"epoch": 4.968698517298188,
|
84487 |
+
"grad_norm": 1.615044116973877,
|
84488 |
+
"learning_rate": 6.86517783291977e-07,
|
84489 |
+
"loss": 0.0571,
|
84490 |
+
"step": 12064
|
84491 |
+
},
|
84492 |
+
{
|
84493 |
+
"epoch": 4.969110378912685,
|
84494 |
+
"grad_norm": 1.5211735963821411,
|
84495 |
+
"learning_rate": 6.782464846980976e-07,
|
84496 |
+
"loss": 0.0579,
|
84497 |
+
"step": 12065
|
84498 |
+
},
|
84499 |
+
{
|
84500 |
+
"epoch": 4.969522240527183,
|
84501 |
+
"grad_norm": 0.5871435403823853,
|
84502 |
+
"learning_rate": 6.699751861042184e-07,
|
84503 |
+
"loss": 0.0218,
|
84504 |
+
"step": 12066
|
84505 |
+
},
|
84506 |
+
{
|
84507 |
+
"epoch": 4.96993410214168,
|
84508 |
+
"grad_norm": 0.691184401512146,
|
84509 |
+
"learning_rate": 6.617038875103391e-07,
|
84510 |
+
"loss": 0.029,
|
84511 |
+
"step": 12067
|
84512 |
+
},
|
84513 |
+
{
|
84514 |
+
"epoch": 4.970345963756178,
|
84515 |
+
"grad_norm": 0.9927480220794678,
|
84516 |
+
"learning_rate": 6.534325889164599e-07,
|
84517 |
+
"loss": 0.0479,
|
84518 |
+
"step": 12068
|
84519 |
+
},
|
84520 |
+
{
|
84521 |
+
"epoch": 4.970757825370676,
|
84522 |
+
"grad_norm": 0.5410662889480591,
|
84523 |
+
"learning_rate": 6.451612903225807e-07,
|
84524 |
+
"loss": 0.0205,
|
84525 |
+
"step": 12069
|
84526 |
+
},
|
84527 |
+
{
|
84528 |
+
"epoch": 4.971169686985173,
|
84529 |
+
"grad_norm": 0.7557745575904846,
|
84530 |
+
"learning_rate": 6.368899917287015e-07,
|
84531 |
+
"loss": 0.0266,
|
84532 |
+
"step": 12070
|
84533 |
+
},
|
84534 |
+
{
|
84535 |
+
"epoch": 4.97158154859967,
|
84536 |
+
"grad_norm": 0.7733569145202637,
|
84537 |
+
"learning_rate": 6.286186931348222e-07,
|
84538 |
+
"loss": 0.0378,
|
84539 |
+
"step": 12071
|
84540 |
+
},
|
84541 |
+
{
|
84542 |
+
"epoch": 4.971993410214168,
|
84543 |
+
"grad_norm": 0.839928925037384,
|
84544 |
+
"learning_rate": 6.20347394540943e-07,
|
84545 |
+
"loss": 0.0324,
|
84546 |
+
"step": 12072
|
84547 |
+
},
|
84548 |
+
{
|
84549 |
+
"epoch": 4.9724052718286655,
|
84550 |
+
"grad_norm": 0.6274780631065369,
|
84551 |
+
"learning_rate": 6.120760959470638e-07,
|
84552 |
+
"loss": 0.019,
|
84553 |
+
"step": 12073
|
84554 |
+
},
|
84555 |
+
{
|
84556 |
+
"epoch": 4.972817133443163,
|
84557 |
+
"grad_norm": 1.1368050575256348,
|
84558 |
+
"learning_rate": 6.038047973531846e-07,
|
84559 |
+
"loss": 0.0612,
|
84560 |
+
"step": 12074
|
84561 |
+
},
|
84562 |
+
{
|
84563 |
+
"epoch": 4.973228995057661,
|
84564 |
+
"grad_norm": 1.059112310409546,
|
84565 |
+
"learning_rate": 5.955334987593052e-07,
|
84566 |
+
"loss": 0.0491,
|
84567 |
+
"step": 12075
|
84568 |
+
},
|
84569 |
+
{
|
84570 |
+
"epoch": 4.973640856672159,
|
84571 |
+
"grad_norm": 1.1168935298919678,
|
84572 |
+
"learning_rate": 5.87262200165426e-07,
|
84573 |
+
"loss": 0.0447,
|
84574 |
+
"step": 12076
|
84575 |
+
},
|
84576 |
+
{
|
84577 |
+
"epoch": 4.974052718286655,
|
84578 |
+
"grad_norm": 0.9928245544433594,
|
84579 |
+
"learning_rate": 5.789909015715467e-07,
|
84580 |
+
"loss": 0.0395,
|
84581 |
+
"step": 12077
|
84582 |
+
},
|
84583 |
+
{
|
84584 |
+
"epoch": 4.974464579901153,
|
84585 |
+
"grad_norm": 0.7998114228248596,
|
84586 |
+
"learning_rate": 5.707196029776676e-07,
|
84587 |
+
"loss": 0.0355,
|
84588 |
+
"step": 12078
|
84589 |
+
},
|
84590 |
+
{
|
84591 |
+
"epoch": 4.974876441515651,
|
84592 |
+
"grad_norm": 0.8010594248771667,
|
84593 |
+
"learning_rate": 5.624483043837883e-07,
|
84594 |
+
"loss": 0.0311,
|
84595 |
+
"step": 12079
|
84596 |
+
},
|
84597 |
+
{
|
84598 |
+
"epoch": 4.9752883031301485,
|
84599 |
+
"grad_norm": 1.6317414045333862,
|
84600 |
+
"learning_rate": 5.541770057899091e-07,
|
84601 |
+
"loss": 0.0561,
|
84602 |
+
"step": 12080
|
84603 |
+
},
|
84604 |
+
{
|
84605 |
+
"epoch": 4.975700164744646,
|
84606 |
+
"grad_norm": 1.217797040939331,
|
84607 |
+
"learning_rate": 5.459057071960298e-07,
|
84608 |
+
"loss": 0.0429,
|
84609 |
+
"step": 12081
|
84610 |
+
},
|
84611 |
+
{
|
84612 |
+
"epoch": 4.976112026359143,
|
84613 |
+
"grad_norm": 1.3075381517410278,
|
84614 |
+
"learning_rate": 5.376344086021506e-07,
|
84615 |
+
"loss": 0.0671,
|
84616 |
+
"step": 12082
|
84617 |
+
},
|
84618 |
+
{
|
84619 |
+
"epoch": 4.976523887973641,
|
84620 |
+
"grad_norm": 0.9281332492828369,
|
84621 |
+
"learning_rate": 5.293631100082714e-07,
|
84622 |
+
"loss": 0.0436,
|
84623 |
+
"step": 12083
|
84624 |
+
},
|
84625 |
+
{
|
84626 |
+
"epoch": 4.976935749588138,
|
84627 |
+
"grad_norm": 1.1827677488327026,
|
84628 |
+
"learning_rate": 5.210918114143921e-07,
|
84629 |
+
"loss": 0.0572,
|
84630 |
+
"step": 12084
|
84631 |
+
},
|
84632 |
+
{
|
84633 |
+
"epoch": 4.977347611202636,
|
84634 |
+
"grad_norm": 1.4458634853363037,
|
84635 |
+
"learning_rate": 5.128205128205128e-07,
|
84636 |
+
"loss": 0.0516,
|
84637 |
+
"step": 12085
|
84638 |
+
},
|
84639 |
+
{
|
84640 |
+
"epoch": 4.977759472817134,
|
84641 |
+
"grad_norm": 0.3626463711261749,
|
84642 |
+
"learning_rate": 5.045492142266336e-07,
|
84643 |
+
"loss": 0.0163,
|
84644 |
+
"step": 12086
|
84645 |
+
},
|
84646 |
+
{
|
84647 |
+
"epoch": 4.9781713344316305,
|
84648 |
+
"grad_norm": 0.9052258729934692,
|
84649 |
+
"learning_rate": 4.962779156327544e-07,
|
84650 |
+
"loss": 0.0385,
|
84651 |
+
"step": 12087
|
84652 |
+
},
|
84653 |
+
{
|
84654 |
+
"epoch": 4.978583196046128,
|
84655 |
+
"grad_norm": 0.9603756666183472,
|
84656 |
+
"learning_rate": 4.880066170388752e-07,
|
84657 |
+
"loss": 0.024,
|
84658 |
+
"step": 12088
|
84659 |
+
},
|
84660 |
+
{
|
84661 |
+
"epoch": 4.978995057660626,
|
84662 |
+
"grad_norm": 2.4891510009765625,
|
84663 |
+
"learning_rate": 4.797353184449959e-07,
|
84664 |
+
"loss": 0.0646,
|
84665 |
+
"step": 12089
|
84666 |
+
},
|
84667 |
+
{
|
84668 |
+
"epoch": 4.979406919275124,
|
84669 |
+
"grad_norm": 0.7407947778701782,
|
84670 |
+
"learning_rate": 4.714640198511167e-07,
|
84671 |
+
"loss": 0.0338,
|
84672 |
+
"step": 12090
|
84673 |
+
},
|
84674 |
+
{
|
84675 |
+
"epoch": 4.979818780889621,
|
84676 |
+
"grad_norm": 0.8082538843154907,
|
84677 |
+
"learning_rate": 4.631927212572374e-07,
|
84678 |
+
"loss": 0.0419,
|
84679 |
+
"step": 12091
|
84680 |
+
},
|
84681 |
+
{
|
84682 |
+
"epoch": 4.980230642504119,
|
84683 |
+
"grad_norm": 0.8320026397705078,
|
84684 |
+
"learning_rate": 4.549214226633582e-07,
|
84685 |
+
"loss": 0.0333,
|
84686 |
+
"step": 12092
|
84687 |
+
},
|
84688 |
+
{
|
84689 |
+
"epoch": 4.980642504118617,
|
84690 |
+
"grad_norm": 0.6993066668510437,
|
84691 |
+
"learning_rate": 4.4665012406947896e-07,
|
84692 |
+
"loss": 0.0251,
|
84693 |
+
"step": 12093
|
84694 |
+
},
|
84695 |
+
{
|
84696 |
+
"epoch": 4.981054365733113,
|
84697 |
+
"grad_norm": 0.6951777935028076,
|
84698 |
+
"learning_rate": 4.3837882547559975e-07,
|
84699 |
+
"loss": 0.0341,
|
84700 |
+
"step": 12094
|
84701 |
+
},
|
84702 |
+
{
|
84703 |
+
"epoch": 4.981466227347611,
|
84704 |
+
"grad_norm": 0.7482096552848816,
|
84705 |
+
"learning_rate": 4.3010752688172043e-07,
|
84706 |
+
"loss": 0.023,
|
84707 |
+
"step": 12095
|
84708 |
+
},
|
84709 |
+
{
|
84710 |
+
"epoch": 4.981878088962109,
|
84711 |
+
"grad_norm": 0.6904179453849792,
|
84712 |
+
"learning_rate": 4.218362282878413e-07,
|
84713 |
+
"loss": 0.0241,
|
84714 |
+
"step": 12096
|
84715 |
+
},
|
84716 |
+
{
|
84717 |
+
"epoch": 4.9822899505766065,
|
84718 |
+
"grad_norm": 1.1358861923217773,
|
84719 |
+
"learning_rate": 4.1356492969396196e-07,
|
84720 |
+
"loss": 0.0552,
|
84721 |
+
"step": 12097
|
84722 |
+
},
|
84723 |
+
{
|
84724 |
+
"epoch": 4.982701812191104,
|
84725 |
+
"grad_norm": 1.545332670211792,
|
84726 |
+
"learning_rate": 4.0529363110008275e-07,
|
84727 |
+
"loss": 0.0793,
|
84728 |
+
"step": 12098
|
84729 |
+
},
|
84730 |
+
{
|
84731 |
+
"epoch": 4.983113673805601,
|
84732 |
+
"grad_norm": 1.2560546398162842,
|
84733 |
+
"learning_rate": 3.970223325062035e-07,
|
84734 |
+
"loss": 0.0387,
|
84735 |
+
"step": 12099
|
84736 |
+
},
|
84737 |
+
{
|
84738 |
+
"epoch": 4.983525535420099,
|
84739 |
+
"grad_norm": 0.8771617412567139,
|
84740 |
+
"learning_rate": 3.8875103391232423e-07,
|
84741 |
+
"loss": 0.0394,
|
84742 |
+
"step": 12100
|
84743 |
+
},
|
84744 |
+
{
|
84745 |
+
"epoch": 4.983937397034596,
|
84746 |
+
"grad_norm": 0.9587097764015198,
|
84747 |
+
"learning_rate": 3.80479735318445e-07,
|
84748 |
+
"loss": 0.0289,
|
84749 |
+
"step": 12101
|
84750 |
+
},
|
84751 |
+
{
|
84752 |
+
"epoch": 4.984349258649094,
|
84753 |
+
"grad_norm": 0.6920797228813171,
|
84754 |
+
"learning_rate": 3.7220843672456576e-07,
|
84755 |
+
"loss": 0.0282,
|
84756 |
+
"step": 12102
|
84757 |
+
},
|
84758 |
+
{
|
84759 |
+
"epoch": 4.984761120263592,
|
84760 |
+
"grad_norm": 0.9588348269462585,
|
84761 |
+
"learning_rate": 3.6393713813068655e-07,
|
84762 |
+
"loss": 0.0369,
|
84763 |
+
"step": 12103
|
84764 |
+
},
|
84765 |
+
{
|
84766 |
+
"epoch": 4.9851729818780885,
|
84767 |
+
"grad_norm": 1.2430922985076904,
|
84768 |
+
"learning_rate": 3.556658395368073e-07,
|
84769 |
+
"loss": 0.0436,
|
84770 |
+
"step": 12104
|
84771 |
+
},
|
84772 |
+
{
|
84773 |
+
"epoch": 4.985584843492586,
|
84774 |
+
"grad_norm": 0.6318089365959167,
|
84775 |
+
"learning_rate": 3.473945409429281e-07,
|
84776 |
+
"loss": 0.0364,
|
84777 |
+
"step": 12105
|
84778 |
+
},
|
84779 |
+
{
|
84780 |
+
"epoch": 4.985996705107084,
|
84781 |
+
"grad_norm": 1.1959476470947266,
|
84782 |
+
"learning_rate": 3.391232423490488e-07,
|
84783 |
+
"loss": 0.0496,
|
84784 |
+
"step": 12106
|
84785 |
+
},
|
84786 |
+
{
|
84787 |
+
"epoch": 4.986408566721582,
|
84788 |
+
"grad_norm": 1.6818280220031738,
|
84789 |
+
"learning_rate": 3.3085194375516956e-07,
|
84790 |
+
"loss": 0.1085,
|
84791 |
+
"step": 12107
|
84792 |
+
},
|
84793 |
+
{
|
84794 |
+
"epoch": 4.986820428336079,
|
84795 |
+
"grad_norm": 0.6175749897956848,
|
84796 |
+
"learning_rate": 3.2258064516129035e-07,
|
84797 |
+
"loss": 0.0268,
|
84798 |
+
"step": 12108
|
84799 |
+
},
|
84800 |
+
{
|
84801 |
+
"epoch": 4.987232289950577,
|
84802 |
+
"grad_norm": 0.5984141230583191,
|
84803 |
+
"learning_rate": 3.143093465674111e-07,
|
84804 |
+
"loss": 0.0191,
|
84805 |
+
"step": 12109
|
84806 |
+
},
|
84807 |
+
{
|
84808 |
+
"epoch": 4.987644151565074,
|
84809 |
+
"grad_norm": 1.360551357269287,
|
84810 |
+
"learning_rate": 3.060380479735319e-07,
|
84811 |
+
"loss": 0.0668,
|
84812 |
+
"step": 12110
|
84813 |
+
},
|
84814 |
+
{
|
84815 |
+
"epoch": 4.9880560131795715,
|
84816 |
+
"grad_norm": 1.3164476156234741,
|
84817 |
+
"learning_rate": 2.977667493796526e-07,
|
84818 |
+
"loss": 0.0622,
|
84819 |
+
"step": 12111
|
84820 |
+
},
|
84821 |
+
{
|
84822 |
+
"epoch": 4.988467874794069,
|
84823 |
+
"grad_norm": 1.2000000476837158,
|
84824 |
+
"learning_rate": 2.8949545078577336e-07,
|
84825 |
+
"loss": 0.067,
|
84826 |
+
"step": 12112
|
84827 |
+
},
|
84828 |
+
{
|
84829 |
+
"epoch": 4.988879736408567,
|
84830 |
+
"grad_norm": 0.5263541340827942,
|
84831 |
+
"learning_rate": 2.8122415219189415e-07,
|
84832 |
+
"loss": 0.0243,
|
84833 |
+
"step": 12113
|
84834 |
+
},
|
84835 |
+
{
|
84836 |
+
"epoch": 4.9892915980230645,
|
84837 |
+
"grad_norm": 1.999869465827942,
|
84838 |
+
"learning_rate": 2.729528535980149e-07,
|
84839 |
+
"loss": 0.0791,
|
84840 |
+
"step": 12114
|
84841 |
+
},
|
84842 |
+
{
|
84843 |
+
"epoch": 4.989703459637562,
|
84844 |
+
"grad_norm": 0.7013229727745056,
|
84845 |
+
"learning_rate": 2.646815550041357e-07,
|
84846 |
+
"loss": 0.022,
|
84847 |
+
"step": 12115
|
84848 |
+
},
|
84849 |
+
{
|
84850 |
+
"epoch": 4.990115321252059,
|
84851 |
+
"grad_norm": 1.4785493612289429,
|
84852 |
+
"learning_rate": 2.564102564102564e-07,
|
84853 |
+
"loss": 0.0482,
|
84854 |
+
"step": 12116
|
84855 |
+
},
|
84856 |
+
{
|
84857 |
+
"epoch": 4.990527182866557,
|
84858 |
+
"grad_norm": 0.6984476447105408,
|
84859 |
+
"learning_rate": 2.481389578163772e-07,
|
84860 |
+
"loss": 0.0331,
|
84861 |
+
"step": 12117
|
84862 |
+
},
|
84863 |
+
{
|
84864 |
+
"epoch": 4.990939044481054,
|
84865 |
+
"grad_norm": 0.7461164593696594,
|
84866 |
+
"learning_rate": 2.3986765922249795e-07,
|
84867 |
+
"loss": 0.0247,
|
84868 |
+
"step": 12118
|
84869 |
+
},
|
84870 |
+
{
|
84871 |
+
"epoch": 4.991350906095552,
|
84872 |
+
"grad_norm": 0.4754248261451721,
|
84873 |
+
"learning_rate": 2.315963606286187e-07,
|
84874 |
+
"loss": 0.0196,
|
84875 |
+
"step": 12119
|
84876 |
+
},
|
84877 |
+
{
|
84878 |
+
"epoch": 4.99176276771005,
|
84879 |
+
"grad_norm": 0.7683712244033813,
|
84880 |
+
"learning_rate": 2.2332506203473948e-07,
|
84881 |
+
"loss": 0.0303,
|
84882 |
+
"step": 12120
|
84883 |
+
},
|
84884 |
+
{
|
84885 |
+
"epoch": 4.992174629324547,
|
84886 |
+
"grad_norm": 0.690411388874054,
|
84887 |
+
"learning_rate": 2.1505376344086022e-07,
|
84888 |
+
"loss": 0.0309,
|
84889 |
+
"step": 12121
|
84890 |
+
},
|
84891 |
+
{
|
84892 |
+
"epoch": 4.992586490939044,
|
84893 |
+
"grad_norm": 0.9615204930305481,
|
84894 |
+
"learning_rate": 2.0678246484698098e-07,
|
84895 |
+
"loss": 0.0489,
|
84896 |
+
"step": 12122
|
84897 |
+
},
|
84898 |
+
{
|
84899 |
+
"epoch": 4.992998352553542,
|
84900 |
+
"grad_norm": 0.9663929343223572,
|
84901 |
+
"learning_rate": 1.9851116625310175e-07,
|
84902 |
+
"loss": 0.0471,
|
84903 |
+
"step": 12123
|
84904 |
+
},
|
84905 |
+
{
|
84906 |
+
"epoch": 4.99341021416804,
|
84907 |
+
"grad_norm": 0.9998257160186768,
|
84908 |
+
"learning_rate": 1.902398676592225e-07,
|
84909 |
+
"loss": 0.0604,
|
84910 |
+
"step": 12124
|
84911 |
+
},
|
84912 |
+
{
|
84913 |
+
"epoch": 4.993822075782537,
|
84914 |
+
"grad_norm": 1.2891749143600464,
|
84915 |
+
"learning_rate": 1.8196856906534328e-07,
|
84916 |
+
"loss": 0.0559,
|
84917 |
+
"step": 12125
|
84918 |
+
},
|
84919 |
+
{
|
84920 |
+
"epoch": 4.994233937397034,
|
84921 |
+
"grad_norm": 0.8184080719947815,
|
84922 |
+
"learning_rate": 1.7369727047146404e-07,
|
84923 |
+
"loss": 0.0392,
|
84924 |
+
"step": 12126
|
84925 |
+
},
|
84926 |
+
{
|
84927 |
+
"epoch": 4.994645799011532,
|
84928 |
+
"grad_norm": 0.5620107650756836,
|
84929 |
+
"learning_rate": 1.6542597187758478e-07,
|
84930 |
+
"loss": 0.0207,
|
84931 |
+
"step": 12127
|
84932 |
+
},
|
84933 |
+
{
|
84934 |
+
"epoch": 4.9950576606260295,
|
84935 |
+
"grad_norm": 0.8689129948616028,
|
84936 |
+
"learning_rate": 1.5715467328370554e-07,
|
84937 |
+
"loss": 0.0304,
|
84938 |
+
"step": 12128
|
84939 |
+
},
|
84940 |
+
{
|
84941 |
+
"epoch": 4.995469522240527,
|
84942 |
+
"grad_norm": 0.7156726717948914,
|
84943 |
+
"learning_rate": 1.488833746898263e-07,
|
84944 |
+
"loss": 0.023,
|
84945 |
+
"step": 12129
|
84946 |
+
},
|
84947 |
+
{
|
84948 |
+
"epoch": 4.995881383855025,
|
84949 |
+
"grad_norm": 1.226078748703003,
|
84950 |
+
"learning_rate": 1.4061207609594707e-07,
|
84951 |
+
"loss": 0.0662,
|
84952 |
+
"step": 12130
|
84953 |
+
},
|
84954 |
+
{
|
84955 |
+
"epoch": 4.996293245469523,
|
84956 |
+
"grad_norm": 0.5333605408668518,
|
84957 |
+
"learning_rate": 1.3234077750206784e-07,
|
84958 |
+
"loss": 0.0246,
|
84959 |
+
"step": 12131
|
84960 |
+
},
|
84961 |
+
{
|
84962 |
+
"epoch": 4.996705107084019,
|
84963 |
+
"grad_norm": 2.105001926422119,
|
84964 |
+
"learning_rate": 1.240694789081886e-07,
|
84965 |
+
"loss": 0.0549,
|
84966 |
+
"step": 12132
|
84967 |
+
},
|
84968 |
+
{
|
84969 |
+
"epoch": 4.997116968698517,
|
84970 |
+
"grad_norm": 0.8374558687210083,
|
84971 |
+
"learning_rate": 1.1579818031430936e-07,
|
84972 |
+
"loss": 0.0271,
|
84973 |
+
"step": 12133
|
84974 |
+
},
|
84975 |
+
{
|
84976 |
+
"epoch": 4.997528830313015,
|
84977 |
+
"grad_norm": 0.9622344970703125,
|
84978 |
+
"learning_rate": 1.0752688172043011e-07,
|
84979 |
+
"loss": 0.0435,
|
84980 |
+
"step": 12134
|
84981 |
+
},
|
84982 |
+
{
|
84983 |
+
"epoch": 4.9979406919275124,
|
84984 |
+
"grad_norm": 0.7495869398117065,
|
84985 |
+
"learning_rate": 9.925558312655087e-08,
|
84986 |
+
"loss": 0.0337,
|
84987 |
+
"step": 12135
|
84988 |
+
},
|
84989 |
+
{
|
84990 |
+
"epoch": 4.99835255354201,
|
84991 |
+
"grad_norm": 0.7919772267341614,
|
84992 |
+
"learning_rate": 9.098428453267164e-08,
|
84993 |
+
"loss": 0.0395,
|
84994 |
+
"step": 12136
|
84995 |
+
},
|
84996 |
+
{
|
84997 |
+
"epoch": 4.998764415156508,
|
84998 |
+
"grad_norm": 1.095862627029419,
|
84999 |
+
"learning_rate": 8.271298593879239e-08,
|
85000 |
+
"loss": 0.0641,
|
85001 |
+
"step": 12137
|
85002 |
+
},
|
85003 |
+
{
|
85004 |
+
"epoch": 4.999176276771005,
|
85005 |
+
"grad_norm": 0.6565665006637573,
|
85006 |
+
"learning_rate": 7.444168734491315e-08,
|
85007 |
+
"loss": 0.0242,
|
85008 |
+
"step": 12138
|
85009 |
+
},
|
85010 |
+
{
|
85011 |
+
"epoch": 4.999588138385502,
|
85012 |
+
"grad_norm": 0.9998601675033569,
|
85013 |
+
"learning_rate": 6.617038875103392e-08,
|
85014 |
+
"loss": 0.0407,
|
85015 |
+
"step": 12139
|
85016 |
+
},
|
85017 |
+
{
|
85018 |
+
"epoch": 5.0,
|
85019 |
+
"grad_norm": 0.5990859270095825,
|
85020 |
+
"learning_rate": 5.789909015715468e-08,
|
85021 |
+
"loss": 0.0159,
|
85022 |
+
"step": 12140
|
85023 |
}
|
85024 |
],
|
85025 |
"logging_steps": 1,
|
|
|
85034 |
"should_evaluate": false,
|
85035 |
"should_log": false,
|
85036 |
"should_save": true,
|
85037 |
+
"should_training_stop": true
|
85038 |
},
|
85039 |
"attributes": {}
|
85040 |
}
|
85041 |
},
|
85042 |
+
"total_flos": 3.310929890869248e+20,
|
85043 |
"train_batch_size": 16,
|
85044 |
"trial_name": null,
|
85045 |
"trial_params": null
|