Training in progress, step 1463, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76c50a918ed6e25310310327d96a41f7be0e1c8dba72e17f5343f2601e20241d
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b696ff230e34810056a87e2deb52ecbe8fdb57a70eb3920f5040424edba8b000
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a0be03340b8e06f61dec93cc182f3169930283c402a6cbbe5edbaf390086811
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ddba364fa66c16b2353d38cd96d0db084fafe0b8f317c23722a5d9cf61bf020
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.8337556719779968,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1400",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9927,6 +9927,447 @@
|
|
9927 |
"eval_samples_per_second": 12.086,
|
9928 |
"eval_steps_per_second": 3.022,
|
9929 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9930 |
}
|
9931 |
],
|
9932 |
"logging_steps": 1,
|
@@ -9950,12 +10391,12 @@
|
|
9950 |
"should_evaluate": false,
|
9951 |
"should_log": false,
|
9952 |
"should_save": true,
|
9953 |
-
"should_training_stop":
|
9954 |
},
|
9955 |
"attributes": {}
|
9956 |
}
|
9957 |
},
|
9958 |
-
"total_flos": 1.
|
9959 |
"train_batch_size": 4,
|
9960 |
"trial_name": null,
|
9961 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.8337556719779968,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1400",
|
4 |
+
"epoch": 2.0010258163788683,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1463,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9927 |
"eval_samples_per_second": 12.086,
|
9928 |
"eval_steps_per_second": 3.022,
|
9929 |
"step": 1400
|
9930 |
+
},
|
9931 |
+
{
|
9932 |
+
"epoch": 1.916224995725765,
|
9933 |
+
"grad_norm": 0.461910605430603,
|
9934 |
+
"learning_rate": 8.971631787046853e-07,
|
9935 |
+
"loss": 0.8424,
|
9936 |
+
"step": 1401
|
9937 |
+
},
|
9938 |
+
{
|
9939 |
+
"epoch": 1.9175927508975894,
|
9940 |
+
"grad_norm": 0.4228653907775879,
|
9941 |
+
"learning_rate": 8.684974514604705e-07,
|
9942 |
+
"loss": 0.6749,
|
9943 |
+
"step": 1402
|
9944 |
+
},
|
9945 |
+
{
|
9946 |
+
"epoch": 1.9189605060694137,
|
9947 |
+
"grad_norm": 0.45295336842536926,
|
9948 |
+
"learning_rate": 8.402951498328926e-07,
|
9949 |
+
"loss": 0.8973,
|
9950 |
+
"step": 1403
|
9951 |
+
},
|
9952 |
+
{
|
9953 |
+
"epoch": 1.9203282612412378,
|
9954 |
+
"grad_norm": 0.4639895558357239,
|
9955 |
+
"learning_rate": 8.125564056637003e-07,
|
9956 |
+
"loss": 0.6882,
|
9957 |
+
"step": 1404
|
9958 |
+
},
|
9959 |
+
{
|
9960 |
+
"epoch": 1.921696016413062,
|
9961 |
+
"grad_norm": 0.5838170647621155,
|
9962 |
+
"learning_rate": 7.852813486275423e-07,
|
9963 |
+
"loss": 0.8531,
|
9964 |
+
"step": 1405
|
9965 |
+
},
|
9966 |
+
{
|
9967 |
+
"epoch": 1.9230637715848862,
|
9968 |
+
"grad_norm": 0.44935956597328186,
|
9969 |
+
"learning_rate": 7.584701062314237e-07,
|
9970 |
+
"loss": 0.6856,
|
9971 |
+
"step": 1406
|
9972 |
+
},
|
9973 |
+
{
|
9974 |
+
"epoch": 1.9244315267567105,
|
9975 |
+
"grad_norm": 0.5075057148933411,
|
9976 |
+
"learning_rate": 7.321228038140726e-07,
|
9977 |
+
"loss": 0.6377,
|
9978 |
+
"step": 1407
|
9979 |
+
},
|
9980 |
+
{
|
9981 |
+
"epoch": 1.9257992819285348,
|
9982 |
+
"grad_norm": 0.4068799912929535,
|
9983 |
+
"learning_rate": 7.062395645453634e-07,
|
9984 |
+
"loss": 0.9096,
|
9985 |
+
"step": 1408
|
9986 |
+
},
|
9987 |
+
{
|
9988 |
+
"epoch": 1.927167037100359,
|
9989 |
+
"grad_norm": 0.4617645740509033,
|
9990 |
+
"learning_rate": 6.808205094257503e-07,
|
9991 |
+
"loss": 0.6808,
|
9992 |
+
"step": 1409
|
9993 |
+
},
|
9994 |
+
{
|
9995 |
+
"epoch": 1.9285347922721834,
|
9996 |
+
"grad_norm": 0.44651269912719727,
|
9997 |
+
"learning_rate": 6.558657572856786e-07,
|
9998 |
+
"loss": 0.5143,
|
9999 |
+
"step": 1410
|
10000 |
+
},
|
10001 |
+
{
|
10002 |
+
"epoch": 1.9299025474440077,
|
10003 |
+
"grad_norm": 0.3676847517490387,
|
10004 |
+
"learning_rate": 6.313754247850523e-07,
|
10005 |
+
"loss": 0.611,
|
10006 |
+
"step": 1411
|
10007 |
+
},
|
10008 |
+
{
|
10009 |
+
"epoch": 1.9312703026158318,
|
10010 |
+
"grad_norm": 0.4552142024040222,
|
10011 |
+
"learning_rate": 6.073496264126788e-07,
|
10012 |
+
"loss": 0.9821,
|
10013 |
+
"step": 1412
|
10014 |
+
},
|
10015 |
+
{
|
10016 |
+
"epoch": 1.9326380577876558,
|
10017 |
+
"grad_norm": 0.4049084484577179,
|
10018 |
+
"learning_rate": 5.837884744857358e-07,
|
10019 |
+
"loss": 0.5781,
|
10020 |
+
"step": 1413
|
10021 |
+
},
|
10022 |
+
{
|
10023 |
+
"epoch": 1.9340058129594802,
|
10024 |
+
"grad_norm": 0.49279075860977173,
|
10025 |
+
"learning_rate": 5.606920791492387e-07,
|
10026 |
+
"loss": 0.6507,
|
10027 |
+
"step": 1414
|
10028 |
+
},
|
10029 |
+
{
|
10030 |
+
"epoch": 1.9353735681313045,
|
10031 |
+
"grad_norm": 0.47832998633384705,
|
10032 |
+
"learning_rate": 5.380605483755408e-07,
|
10033 |
+
"loss": 0.69,
|
10034 |
+
"step": 1415
|
10035 |
+
},
|
10036 |
+
{
|
10037 |
+
"epoch": 1.9367413233031288,
|
10038 |
+
"grad_norm": 0.5709347128868103,
|
10039 |
+
"learning_rate": 5.158939879638225e-07,
|
10040 |
+
"loss": 0.8283,
|
10041 |
+
"step": 1416
|
10042 |
+
},
|
10043 |
+
{
|
10044 |
+
"epoch": 1.938109078474953,
|
10045 |
+
"grad_norm": 0.44375723600387573,
|
10046 |
+
"learning_rate": 4.941925015395699e-07,
|
10047 |
+
"loss": 0.7771,
|
10048 |
+
"step": 1417
|
10049 |
+
},
|
10050 |
+
{
|
10051 |
+
"epoch": 1.9394768336467774,
|
10052 |
+
"grad_norm": 0.48919472098350525,
|
10053 |
+
"learning_rate": 4.729561905541524e-07,
|
10054 |
+
"loss": 0.7034,
|
10055 |
+
"step": 1418
|
10056 |
+
},
|
10057 |
+
{
|
10058 |
+
"epoch": 1.9408445888186014,
|
10059 |
+
"grad_norm": 0.38905012607574463,
|
10060 |
+
"learning_rate": 4.521851542842681e-07,
|
10061 |
+
"loss": 0.6598,
|
10062 |
+
"step": 1419
|
10063 |
+
},
|
10064 |
+
{
|
10065 |
+
"epoch": 1.9422123439904257,
|
10066 |
+
"grad_norm": 0.4131908714771271,
|
10067 |
+
"learning_rate": 4.31879489831577e-07,
|
10068 |
+
"loss": 0.7236,
|
10069 |
+
"step": 1420
|
10070 |
+
},
|
10071 |
+
{
|
10072 |
+
"epoch": 1.9435800991622498,
|
10073 |
+
"grad_norm": 0.37935328483581543,
|
10074 |
+
"learning_rate": 4.12039292122135e-07,
|
10075 |
+
"loss": 0.6964,
|
10076 |
+
"step": 1421
|
10077 |
+
},
|
10078 |
+
{
|
10079 |
+
"epoch": 1.9449478543340741,
|
10080 |
+
"grad_norm": 0.3789116442203522,
|
10081 |
+
"learning_rate": 3.9266465390603855e-07,
|
10082 |
+
"loss": 0.6997,
|
10083 |
+
"step": 1422
|
10084 |
+
},
|
10085 |
+
{
|
10086 |
+
"epoch": 1.9463156095058984,
|
10087 |
+
"grad_norm": 0.4881550669670105,
|
10088 |
+
"learning_rate": 3.7375566575695854e-07,
|
10089 |
+
"loss": 0.6896,
|
10090 |
+
"step": 1423
|
10091 |
+
},
|
10092 |
+
{
|
10093 |
+
"epoch": 1.9476833646777227,
|
10094 |
+
"grad_norm": 0.4398065507411957,
|
10095 |
+
"learning_rate": 3.5531241607170695e-07,
|
10096 |
+
"loss": 0.6534,
|
10097 |
+
"step": 1424
|
10098 |
+
},
|
10099 |
+
{
|
10100 |
+
"epoch": 1.949051119849547,
|
10101 |
+
"grad_norm": 0.45195892453193665,
|
10102 |
+
"learning_rate": 3.373349910698487e-07,
|
10103 |
+
"loss": 0.6845,
|
10104 |
+
"step": 1425
|
10105 |
+
},
|
10106 |
+
{
|
10107 |
+
"epoch": 1.9504188750213711,
|
10108 |
+
"grad_norm": 0.38856571912765503,
|
10109 |
+
"learning_rate": 3.1982347479327935e-07,
|
10110 |
+
"loss": 0.6648,
|
10111 |
+
"step": 1426
|
10112 |
+
},
|
10113 |
+
{
|
10114 |
+
"epoch": 1.9517866301931954,
|
10115 |
+
"grad_norm": 0.5333290100097656,
|
10116 |
+
"learning_rate": 3.027779491058369e-07,
|
10117 |
+
"loss": 0.8773,
|
10118 |
+
"step": 1427
|
10119 |
+
},
|
10120 |
+
{
|
10121 |
+
"epoch": 1.9531543853650195,
|
10122 |
+
"grad_norm": 0.42469778656959534,
|
10123 |
+
"learning_rate": 2.8619849369290185e-07,
|
10124 |
+
"loss": 0.6108,
|
10125 |
+
"step": 1428
|
10126 |
+
},
|
10127 |
+
{
|
10128 |
+
"epoch": 1.9545221405368438,
|
10129 |
+
"grad_norm": 0.4393293857574463,
|
10130 |
+
"learning_rate": 2.7008518606108644e-07,
|
10131 |
+
"loss": 0.7355,
|
10132 |
+
"step": 1429
|
10133 |
+
},
|
10134 |
+
{
|
10135 |
+
"epoch": 1.9558898957086681,
|
10136 |
+
"grad_norm": 0.4575292468070984,
|
10137 |
+
"learning_rate": 2.544381015377906e-07,
|
10138 |
+
"loss": 0.7924,
|
10139 |
+
"step": 1430
|
10140 |
+
},
|
10141 |
+
{
|
10142 |
+
"epoch": 1.9572576508804924,
|
10143 |
+
"grad_norm": 0.3630157709121704,
|
10144 |
+
"learning_rate": 2.3925731327089086e-07,
|
10145 |
+
"loss": 0.4605,
|
10146 |
+
"step": 1431
|
10147 |
+
},
|
10148 |
+
{
|
10149 |
+
"epoch": 1.9586254060523167,
|
10150 |
+
"grad_norm": 0.48664095997810364,
|
10151 |
+
"learning_rate": 2.2454289222842984e-07,
|
10152 |
+
"loss": 0.7356,
|
10153 |
+
"step": 1432
|
10154 |
+
},
|
10155 |
+
{
|
10156 |
+
"epoch": 1.959993161224141,
|
10157 |
+
"grad_norm": 0.49082890152931213,
|
10158 |
+
"learning_rate": 2.1029490719819411e-07,
|
10159 |
+
"loss": 0.654,
|
10160 |
+
"step": 1433
|
10161 |
+
},
|
10162 |
+
{
|
10163 |
+
"epoch": 1.961360916395965,
|
10164 |
+
"grad_norm": 0.3563566207885742,
|
10165 |
+
"learning_rate": 1.9651342478749223e-07,
|
10166 |
+
"loss": 0.5259,
|
10167 |
+
"step": 1434
|
10168 |
+
},
|
10169 |
+
{
|
10170 |
+
"epoch": 1.9627286715677894,
|
10171 |
+
"grad_norm": 0.4090525805950165,
|
10172 |
+
"learning_rate": 1.8319850942278839e-07,
|
10173 |
+
"loss": 0.8423,
|
10174 |
+
"step": 1435
|
10175 |
+
},
|
10176 |
+
{
|
10177 |
+
"epoch": 1.9640964267396135,
|
10178 |
+
"grad_norm": 0.39165446162223816,
|
10179 |
+
"learning_rate": 1.7035022334941364e-07,
|
10180 |
+
"loss": 0.7934,
|
10181 |
+
"step": 1436
|
10182 |
+
},
|
10183 |
+
{
|
10184 |
+
"epoch": 1.9654641819114378,
|
10185 |
+
"grad_norm": 0.4389599561691284,
|
10186 |
+
"learning_rate": 1.5796862663126634e-07,
|
10187 |
+
"loss": 0.5754,
|
10188 |
+
"step": 1437
|
10189 |
+
},
|
10190 |
+
{
|
10191 |
+
"epoch": 1.966831937083262,
|
10192 |
+
"grad_norm": 0.47216951847076416,
|
10193 |
+
"learning_rate": 1.4605377715053436e-07,
|
10194 |
+
"loss": 0.789,
|
10195 |
+
"step": 1438
|
10196 |
+
},
|
10197 |
+
{
|
10198 |
+
"epoch": 1.9681996922550864,
|
10199 |
+
"grad_norm": 0.39848554134368896,
|
10200 |
+
"learning_rate": 1.3460573060745106e-07,
|
10201 |
+
"loss": 0.7184,
|
10202 |
+
"step": 1439
|
10203 |
+
},
|
10204 |
+
{
|
10205 |
+
"epoch": 1.9695674474269107,
|
10206 |
+
"grad_norm": 0.550101637840271,
|
10207 |
+
"learning_rate": 1.2362454051998428e-07,
|
10208 |
+
"loss": 0.7593,
|
10209 |
+
"step": 1440
|
10210 |
+
},
|
10211 |
+
{
|
10212 |
+
"epoch": 1.9709352025987348,
|
10213 |
+
"grad_norm": 0.5361692905426025,
|
10214 |
+
"learning_rate": 1.1311025822364762e-07,
|
10215 |
+
"loss": 0.7345,
|
10216 |
+
"step": 1441
|
10217 |
+
},
|
10218 |
+
{
|
10219 |
+
"epoch": 1.972302957770559,
|
10220 |
+
"grad_norm": 0.4945903420448303,
|
10221 |
+
"learning_rate": 1.0306293287118962e-07,
|
10222 |
+
"loss": 0.781,
|
10223 |
+
"step": 1442
|
10224 |
+
},
|
10225 |
+
{
|
10226 |
+
"epoch": 1.9736707129423832,
|
10227 |
+
"grad_norm": 0.36988723278045654,
|
10228 |
+
"learning_rate": 9.348261143243831e-08,
|
10229 |
+
"loss": 0.7448,
|
10230 |
+
"step": 1443
|
10231 |
+
},
|
10232 |
+
{
|
10233 |
+
"epoch": 1.9750384681142075,
|
10234 |
+
"grad_norm": 0.38076066970825195,
|
10235 |
+
"learning_rate": 8.436933869402363e-08,
|
10236 |
+
"loss": 0.4683,
|
10237 |
+
"step": 1444
|
10238 |
+
},
|
10239 |
+
{
|
10240 |
+
"epoch": 1.9764062232860318,
|
10241 |
+
"grad_norm": 0.4106510281562805,
|
10242 |
+
"learning_rate": 7.572315725918877e-08,
|
10243 |
+
"loss": 0.9063,
|
10244 |
+
"step": 1445
|
10245 |
+
},
|
10246 |
+
{
|
10247 |
+
"epoch": 1.977773978457856,
|
10248 |
+
"grad_norm": 0.486651211977005,
|
10249 |
+
"learning_rate": 6.754410754759022e-08,
|
10250 |
+
"loss": 0.864,
|
10251 |
+
"step": 1446
|
10252 |
+
},
|
10253 |
+
{
|
10254 |
+
"epoch": 1.9791417336296804,
|
10255 |
+
"grad_norm": 0.4399421215057373,
|
10256 |
+
"learning_rate": 5.983222779514242e-08,
|
10257 |
+
"loss": 0.7105,
|
10258 |
+
"step": 1447
|
10259 |
+
},
|
10260 |
+
{
|
10261 |
+
"epoch": 1.9805094888015047,
|
10262 |
+
"grad_norm": 0.3864593803882599,
|
10263 |
+
"learning_rate": 5.2587554053751266e-08,
|
10264 |
+
"loss": 0.7329,
|
10265 |
+
"step": 1448
|
10266 |
+
},
|
10267 |
+
{
|
10268 |
+
"epoch": 1.9818772439733288,
|
10269 |
+
"grad_norm": 0.38582947850227356,
|
10270 |
+
"learning_rate": 4.581012019125863e-08,
|
10271 |
+
"loss": 0.7425,
|
10272 |
+
"step": 1449
|
10273 |
+
},
|
10274 |
+
{
|
10275 |
+
"epoch": 1.983244999145153,
|
10276 |
+
"grad_norm": 0.37019839882850647,
|
10277 |
+
"learning_rate": 3.949995789118699e-08,
|
10278 |
+
"loss": 0.5574,
|
10279 |
+
"step": 1450
|
10280 |
+
},
|
10281 |
+
{
|
10282 |
+
"epoch": 1.9846127543169771,
|
10283 |
+
"grad_norm": 0.44995346665382385,
|
10284 |
+
"learning_rate": 3.36570966526395e-08,
|
10285 |
+
"loss": 0.6735,
|
10286 |
+
"step": 1451
|
10287 |
+
},
|
10288 |
+
{
|
10289 |
+
"epoch": 1.9859805094888014,
|
10290 |
+
"grad_norm": 0.4024134874343872,
|
10291 |
+
"learning_rate": 2.8281563790166776e-08,
|
10292 |
+
"loss": 0.8033,
|
10293 |
+
"step": 1452
|
10294 |
+
},
|
10295 |
+
{
|
10296 |
+
"epoch": 1.9873482646606258,
|
10297 |
+
"grad_norm": 0.4701332449913025,
|
10298 |
+
"learning_rate": 2.337338443361148e-08,
|
10299 |
+
"loss": 0.8692,
|
10300 |
+
"step": 1453
|
10301 |
+
},
|
10302 |
+
{
|
10303 |
+
"epoch": 1.98871601983245,
|
10304 |
+
"grad_norm": 0.4885278046131134,
|
10305 |
+
"learning_rate": 1.8932581528008364e-08,
|
10306 |
+
"loss": 0.7488,
|
10307 |
+
"step": 1454
|
10308 |
+
},
|
10309 |
+
{
|
10310 |
+
"epoch": 1.9900837750042744,
|
10311 |
+
"grad_norm": 0.4467014670372009,
|
10312 |
+
"learning_rate": 1.4959175833495486e-08,
|
10313 |
+
"loss": 0.669,
|
10314 |
+
"step": 1455
|
10315 |
+
},
|
10316 |
+
{
|
10317 |
+
"epoch": 1.9914515301760984,
|
10318 |
+
"grad_norm": 0.5788629651069641,
|
10319 |
+
"learning_rate": 1.1453185925158761e-08,
|
10320 |
+
"loss": 0.7926,
|
10321 |
+
"step": 1456
|
10322 |
+
},
|
10323 |
+
{
|
10324 |
+
"epoch": 1.9928192853479227,
|
10325 |
+
"grad_norm": 0.5242011547088623,
|
10326 |
+
"learning_rate": 8.414628192998652e-09,
|
10327 |
+
"loss": 0.9303,
|
10328 |
+
"step": 1457
|
10329 |
+
},
|
10330 |
+
{
|
10331 |
+
"epoch": 1.9941870405197468,
|
10332 |
+
"grad_norm": 0.3865733742713928,
|
10333 |
+
"learning_rate": 5.84351684185247e-09,
|
10334 |
+
"loss": 0.6104,
|
10335 |
+
"step": 1458
|
10336 |
+
},
|
10337 |
+
{
|
10338 |
+
"epoch": 1.9955547956915711,
|
10339 |
+
"grad_norm": 0.5221365690231323,
|
10340 |
+
"learning_rate": 3.739863891283335e-09,
|
10341 |
+
"loss": 0.9627,
|
10342 |
+
"step": 1459
|
10343 |
+
},
|
10344 |
+
{
|
10345 |
+
"epoch": 1.9969225508633954,
|
10346 |
+
"grad_norm": 0.5138529539108276,
|
10347 |
+
"learning_rate": 2.1036791755801866e-09,
|
10348 |
+
"loss": 0.613,
|
10349 |
+
"step": 1460
|
10350 |
+
},
|
10351 |
+
{
|
10352 |
+
"epoch": 1.9982903060352197,
|
10353 |
+
"grad_norm": 0.5094766020774841,
|
10354 |
+
"learning_rate": 9.349703436689617e-10,
|
10355 |
+
"loss": 0.8272,
|
10356 |
+
"step": 1461
|
10357 |
+
},
|
10358 |
+
{
|
10359 |
+
"epoch": 1.999658061207044,
|
10360 |
+
"grad_norm": 0.4561411142349243,
|
10361 |
+
"learning_rate": 2.337428590903912e-10,
|
10362 |
+
"loss": 0.7996,
|
10363 |
+
"step": 1462
|
10364 |
+
},
|
10365 |
+
{
|
10366 |
+
"epoch": 2.0010258163788683,
|
10367 |
+
"grad_norm": 1.2804397344589233,
|
10368 |
+
"learning_rate": 0.0,
|
10369 |
+
"loss": 1.2298,
|
10370 |
+
"step": 1463
|
10371 |
}
|
10372 |
],
|
10373 |
"logging_steps": 1,
|
|
|
10391 |
"should_evaluate": false,
|
10392 |
"should_log": false,
|
10393 |
"should_save": true,
|
10394 |
+
"should_training_stop": true
|
10395 |
},
|
10396 |
"attributes": {}
|
10397 |
}
|
10398 |
},
|
10399 |
+
"total_flos": 1.6517521711772467e+18,
|
10400 |
"train_batch_size": 4,
|
10401 |
"trial_name": null,
|
10402 |
"trial_params": null
|