Alphatao commited on
Commit
c98975f
·
verified ·
1 Parent(s): 1f149a9

Training in progress, step 1463, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62bb9bb9b77666d17bdc3df547411bff527086e2cbe55ec527b6c0bff62c3c6c
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c50a918ed6e25310310327d96a41f7be0e1c8dba72e17f5343f2601e20241d
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2240d1cf7bee9ac6f7ad5b8ee959d9b94749f2444ea20777ea0317b7f0d24c5
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b696ff230e34810056a87e2deb52ecbe8fdb57a70eb3920f5040424edba8b000
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7fddfe0e996aab68b2e2a4ae0b2b5688241a5f64c97387ef9b362e165ee6f48
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a0be03340b8e06f61dec93cc182f3169930283c402a6cbbe5edbaf390086811
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ad3eba5ecd76309cae37fb299b7255ca533e46092d32ba5488b2545bff9b369
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ddba364fa66c16b2353d38cd96d0db084fafe0b8f317c23722a5d9cf61bf020
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8337556719779968,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1400",
4
- "epoch": 1.9148572405539408,
5
  "eval_steps": 100,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9927,6 +9927,447 @@
9927
  "eval_samples_per_second": 12.086,
9928
  "eval_steps_per_second": 3.022,
9929
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9930
  }
9931
  ],
9932
  "logging_steps": 1,
@@ -9950,12 +10391,12 @@
9950
  "should_evaluate": false,
9951
  "should_log": false,
9952
  "should_save": true,
9953
- "should_training_stop": false
9954
  },
9955
  "attributes": {}
9956
  }
9957
  },
9958
- "total_flos": 1.5806932897014743e+18,
9959
  "train_batch_size": 4,
9960
  "trial_name": null,
9961
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8337556719779968,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1400",
4
+ "epoch": 2.0010258163788683,
5
  "eval_steps": 100,
6
+ "global_step": 1463,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9927
  "eval_samples_per_second": 12.086,
9928
  "eval_steps_per_second": 3.022,
9929
  "step": 1400
9930
+ },
9931
+ {
9932
+ "epoch": 1.916224995725765,
9933
+ "grad_norm": 0.461910605430603,
9934
+ "learning_rate": 8.971631787046853e-07,
9935
+ "loss": 0.8424,
9936
+ "step": 1401
9937
+ },
9938
+ {
9939
+ "epoch": 1.9175927508975894,
9940
+ "grad_norm": 0.4228653907775879,
9941
+ "learning_rate": 8.684974514604705e-07,
9942
+ "loss": 0.6749,
9943
+ "step": 1402
9944
+ },
9945
+ {
9946
+ "epoch": 1.9189605060694137,
9947
+ "grad_norm": 0.45295336842536926,
9948
+ "learning_rate": 8.402951498328926e-07,
9949
+ "loss": 0.8973,
9950
+ "step": 1403
9951
+ },
9952
+ {
9953
+ "epoch": 1.9203282612412378,
9954
+ "grad_norm": 0.4639895558357239,
9955
+ "learning_rate": 8.125564056637003e-07,
9956
+ "loss": 0.6882,
9957
+ "step": 1404
9958
+ },
9959
+ {
9960
+ "epoch": 1.921696016413062,
9961
+ "grad_norm": 0.5838170647621155,
9962
+ "learning_rate": 7.852813486275423e-07,
9963
+ "loss": 0.8531,
9964
+ "step": 1405
9965
+ },
9966
+ {
9967
+ "epoch": 1.9230637715848862,
9968
+ "grad_norm": 0.44935956597328186,
9969
+ "learning_rate": 7.584701062314237e-07,
9970
+ "loss": 0.6856,
9971
+ "step": 1406
9972
+ },
9973
+ {
9974
+ "epoch": 1.9244315267567105,
9975
+ "grad_norm": 0.5075057148933411,
9976
+ "learning_rate": 7.321228038140726e-07,
9977
+ "loss": 0.6377,
9978
+ "step": 1407
9979
+ },
9980
+ {
9981
+ "epoch": 1.9257992819285348,
9982
+ "grad_norm": 0.4068799912929535,
9983
+ "learning_rate": 7.062395645453634e-07,
9984
+ "loss": 0.9096,
9985
+ "step": 1408
9986
+ },
9987
+ {
9988
+ "epoch": 1.927167037100359,
9989
+ "grad_norm": 0.4617645740509033,
9990
+ "learning_rate": 6.808205094257503e-07,
9991
+ "loss": 0.6808,
9992
+ "step": 1409
9993
+ },
9994
+ {
9995
+ "epoch": 1.9285347922721834,
9996
+ "grad_norm": 0.44651269912719727,
9997
+ "learning_rate": 6.558657572856786e-07,
9998
+ "loss": 0.5143,
9999
+ "step": 1410
10000
+ },
10001
+ {
10002
+ "epoch": 1.9299025474440077,
10003
+ "grad_norm": 0.3676847517490387,
10004
+ "learning_rate": 6.313754247850523e-07,
10005
+ "loss": 0.611,
10006
+ "step": 1411
10007
+ },
10008
+ {
10009
+ "epoch": 1.9312703026158318,
10010
+ "grad_norm": 0.4552142024040222,
10011
+ "learning_rate": 6.073496264126788e-07,
10012
+ "loss": 0.9821,
10013
+ "step": 1412
10014
+ },
10015
+ {
10016
+ "epoch": 1.9326380577876558,
10017
+ "grad_norm": 0.4049084484577179,
10018
+ "learning_rate": 5.837884744857358e-07,
10019
+ "loss": 0.5781,
10020
+ "step": 1413
10021
+ },
10022
+ {
10023
+ "epoch": 1.9340058129594802,
10024
+ "grad_norm": 0.49279075860977173,
10025
+ "learning_rate": 5.606920791492387e-07,
10026
+ "loss": 0.6507,
10027
+ "step": 1414
10028
+ },
10029
+ {
10030
+ "epoch": 1.9353735681313045,
10031
+ "grad_norm": 0.47832998633384705,
10032
+ "learning_rate": 5.380605483755408e-07,
10033
+ "loss": 0.69,
10034
+ "step": 1415
10035
+ },
10036
+ {
10037
+ "epoch": 1.9367413233031288,
10038
+ "grad_norm": 0.5709347128868103,
10039
+ "learning_rate": 5.158939879638225e-07,
10040
+ "loss": 0.8283,
10041
+ "step": 1416
10042
+ },
10043
+ {
10044
+ "epoch": 1.938109078474953,
10045
+ "grad_norm": 0.44375723600387573,
10046
+ "learning_rate": 4.941925015395699e-07,
10047
+ "loss": 0.7771,
10048
+ "step": 1417
10049
+ },
10050
+ {
10051
+ "epoch": 1.9394768336467774,
10052
+ "grad_norm": 0.48919472098350525,
10053
+ "learning_rate": 4.729561905541524e-07,
10054
+ "loss": 0.7034,
10055
+ "step": 1418
10056
+ },
10057
+ {
10058
+ "epoch": 1.9408445888186014,
10059
+ "grad_norm": 0.38905012607574463,
10060
+ "learning_rate": 4.521851542842681e-07,
10061
+ "loss": 0.6598,
10062
+ "step": 1419
10063
+ },
10064
+ {
10065
+ "epoch": 1.9422123439904257,
10066
+ "grad_norm": 0.4131908714771271,
10067
+ "learning_rate": 4.31879489831577e-07,
10068
+ "loss": 0.7236,
10069
+ "step": 1420
10070
+ },
10071
+ {
10072
+ "epoch": 1.9435800991622498,
10073
+ "grad_norm": 0.37935328483581543,
10074
+ "learning_rate": 4.12039292122135e-07,
10075
+ "loss": 0.6964,
10076
+ "step": 1421
10077
+ },
10078
+ {
10079
+ "epoch": 1.9449478543340741,
10080
+ "grad_norm": 0.3789116442203522,
10081
+ "learning_rate": 3.9266465390603855e-07,
10082
+ "loss": 0.6997,
10083
+ "step": 1422
10084
+ },
10085
+ {
10086
+ "epoch": 1.9463156095058984,
10087
+ "grad_norm": 0.4881550669670105,
10088
+ "learning_rate": 3.7375566575695854e-07,
10089
+ "loss": 0.6896,
10090
+ "step": 1423
10091
+ },
10092
+ {
10093
+ "epoch": 1.9476833646777227,
10094
+ "grad_norm": 0.4398065507411957,
10095
+ "learning_rate": 3.5531241607170695e-07,
10096
+ "loss": 0.6534,
10097
+ "step": 1424
10098
+ },
10099
+ {
10100
+ "epoch": 1.949051119849547,
10101
+ "grad_norm": 0.45195892453193665,
10102
+ "learning_rate": 3.373349910698487e-07,
10103
+ "loss": 0.6845,
10104
+ "step": 1425
10105
+ },
10106
+ {
10107
+ "epoch": 1.9504188750213711,
10108
+ "grad_norm": 0.38856571912765503,
10109
+ "learning_rate": 3.1982347479327935e-07,
10110
+ "loss": 0.6648,
10111
+ "step": 1426
10112
+ },
10113
+ {
10114
+ "epoch": 1.9517866301931954,
10115
+ "grad_norm": 0.5333290100097656,
10116
+ "learning_rate": 3.027779491058369e-07,
10117
+ "loss": 0.8773,
10118
+ "step": 1427
10119
+ },
10120
+ {
10121
+ "epoch": 1.9531543853650195,
10122
+ "grad_norm": 0.42469778656959534,
10123
+ "learning_rate": 2.8619849369290185e-07,
10124
+ "loss": 0.6108,
10125
+ "step": 1428
10126
+ },
10127
+ {
10128
+ "epoch": 1.9545221405368438,
10129
+ "grad_norm": 0.4393293857574463,
10130
+ "learning_rate": 2.7008518606108644e-07,
10131
+ "loss": 0.7355,
10132
+ "step": 1429
10133
+ },
10134
+ {
10135
+ "epoch": 1.9558898957086681,
10136
+ "grad_norm": 0.4575292468070984,
10137
+ "learning_rate": 2.544381015377906e-07,
10138
+ "loss": 0.7924,
10139
+ "step": 1430
10140
+ },
10141
+ {
10142
+ "epoch": 1.9572576508804924,
10143
+ "grad_norm": 0.3630157709121704,
10144
+ "learning_rate": 2.3925731327089086e-07,
10145
+ "loss": 0.4605,
10146
+ "step": 1431
10147
+ },
10148
+ {
10149
+ "epoch": 1.9586254060523167,
10150
+ "grad_norm": 0.48664095997810364,
10151
+ "learning_rate": 2.2454289222842984e-07,
10152
+ "loss": 0.7356,
10153
+ "step": 1432
10154
+ },
10155
+ {
10156
+ "epoch": 1.959993161224141,
10157
+ "grad_norm": 0.49082890152931213,
10158
+ "learning_rate": 2.1029490719819411e-07,
10159
+ "loss": 0.654,
10160
+ "step": 1433
10161
+ },
10162
+ {
10163
+ "epoch": 1.961360916395965,
10164
+ "grad_norm": 0.3563566207885742,
10165
+ "learning_rate": 1.9651342478749223e-07,
10166
+ "loss": 0.5259,
10167
+ "step": 1434
10168
+ },
10169
+ {
10170
+ "epoch": 1.9627286715677894,
10171
+ "grad_norm": 0.4090525805950165,
10172
+ "learning_rate": 1.8319850942278839e-07,
10173
+ "loss": 0.8423,
10174
+ "step": 1435
10175
+ },
10176
+ {
10177
+ "epoch": 1.9640964267396135,
10178
+ "grad_norm": 0.39165446162223816,
10179
+ "learning_rate": 1.7035022334941364e-07,
10180
+ "loss": 0.7934,
10181
+ "step": 1436
10182
+ },
10183
+ {
10184
+ "epoch": 1.9654641819114378,
10185
+ "grad_norm": 0.4389599561691284,
10186
+ "learning_rate": 1.5796862663126634e-07,
10187
+ "loss": 0.5754,
10188
+ "step": 1437
10189
+ },
10190
+ {
10191
+ "epoch": 1.966831937083262,
10192
+ "grad_norm": 0.47216951847076416,
10193
+ "learning_rate": 1.4605377715053436e-07,
10194
+ "loss": 0.789,
10195
+ "step": 1438
10196
+ },
10197
+ {
10198
+ "epoch": 1.9681996922550864,
10199
+ "grad_norm": 0.39848554134368896,
10200
+ "learning_rate": 1.3460573060745106e-07,
10201
+ "loss": 0.7184,
10202
+ "step": 1439
10203
+ },
10204
+ {
10205
+ "epoch": 1.9695674474269107,
10206
+ "grad_norm": 0.550101637840271,
10207
+ "learning_rate": 1.2362454051998428e-07,
10208
+ "loss": 0.7593,
10209
+ "step": 1440
10210
+ },
10211
+ {
10212
+ "epoch": 1.9709352025987348,
10213
+ "grad_norm": 0.5361692905426025,
10214
+ "learning_rate": 1.1311025822364762e-07,
10215
+ "loss": 0.7345,
10216
+ "step": 1441
10217
+ },
10218
+ {
10219
+ "epoch": 1.972302957770559,
10220
+ "grad_norm": 0.4945903420448303,
10221
+ "learning_rate": 1.0306293287118962e-07,
10222
+ "loss": 0.781,
10223
+ "step": 1442
10224
+ },
10225
+ {
10226
+ "epoch": 1.9736707129423832,
10227
+ "grad_norm": 0.36988723278045654,
10228
+ "learning_rate": 9.348261143243831e-08,
10229
+ "loss": 0.7448,
10230
+ "step": 1443
10231
+ },
10232
+ {
10233
+ "epoch": 1.9750384681142075,
10234
+ "grad_norm": 0.38076066970825195,
10235
+ "learning_rate": 8.436933869402363e-08,
10236
+ "loss": 0.4683,
10237
+ "step": 1444
10238
+ },
10239
+ {
10240
+ "epoch": 1.9764062232860318,
10241
+ "grad_norm": 0.4106510281562805,
10242
+ "learning_rate": 7.572315725918877e-08,
10243
+ "loss": 0.9063,
10244
+ "step": 1445
10245
+ },
10246
+ {
10247
+ "epoch": 1.977773978457856,
10248
+ "grad_norm": 0.486651211977005,
10249
+ "learning_rate": 6.754410754759022e-08,
10250
+ "loss": 0.864,
10251
+ "step": 1446
10252
+ },
10253
+ {
10254
+ "epoch": 1.9791417336296804,
10255
+ "grad_norm": 0.4399421215057373,
10256
+ "learning_rate": 5.983222779514242e-08,
10257
+ "loss": 0.7105,
10258
+ "step": 1447
10259
+ },
10260
+ {
10261
+ "epoch": 1.9805094888015047,
10262
+ "grad_norm": 0.3864593803882599,
10263
+ "learning_rate": 5.2587554053751266e-08,
10264
+ "loss": 0.7329,
10265
+ "step": 1448
10266
+ },
10267
+ {
10268
+ "epoch": 1.9818772439733288,
10269
+ "grad_norm": 0.38582947850227356,
10270
+ "learning_rate": 4.581012019125863e-08,
10271
+ "loss": 0.7425,
10272
+ "step": 1449
10273
+ },
10274
+ {
10275
+ "epoch": 1.983244999145153,
10276
+ "grad_norm": 0.37019839882850647,
10277
+ "learning_rate": 3.949995789118699e-08,
10278
+ "loss": 0.5574,
10279
+ "step": 1450
10280
+ },
10281
+ {
10282
+ "epoch": 1.9846127543169771,
10283
+ "grad_norm": 0.44995346665382385,
10284
+ "learning_rate": 3.36570966526395e-08,
10285
+ "loss": 0.6735,
10286
+ "step": 1451
10287
+ },
10288
+ {
10289
+ "epoch": 1.9859805094888014,
10290
+ "grad_norm": 0.4024134874343872,
10291
+ "learning_rate": 2.8281563790166776e-08,
10292
+ "loss": 0.8033,
10293
+ "step": 1452
10294
+ },
10295
+ {
10296
+ "epoch": 1.9873482646606258,
10297
+ "grad_norm": 0.4701332449913025,
10298
+ "learning_rate": 2.337338443361148e-08,
10299
+ "loss": 0.8692,
10300
+ "step": 1453
10301
+ },
10302
+ {
10303
+ "epoch": 1.98871601983245,
10304
+ "grad_norm": 0.4885278046131134,
10305
+ "learning_rate": 1.8932581528008364e-08,
10306
+ "loss": 0.7488,
10307
+ "step": 1454
10308
+ },
10309
+ {
10310
+ "epoch": 1.9900837750042744,
10311
+ "grad_norm": 0.4467014670372009,
10312
+ "learning_rate": 1.4959175833495486e-08,
10313
+ "loss": 0.669,
10314
+ "step": 1455
10315
+ },
10316
+ {
10317
+ "epoch": 1.9914515301760984,
10318
+ "grad_norm": 0.5788629651069641,
10319
+ "learning_rate": 1.1453185925158761e-08,
10320
+ "loss": 0.7926,
10321
+ "step": 1456
10322
+ },
10323
+ {
10324
+ "epoch": 1.9928192853479227,
10325
+ "grad_norm": 0.5242011547088623,
10326
+ "learning_rate": 8.414628192998652e-09,
10327
+ "loss": 0.9303,
10328
+ "step": 1457
10329
+ },
10330
+ {
10331
+ "epoch": 1.9941870405197468,
10332
+ "grad_norm": 0.3865733742713928,
10333
+ "learning_rate": 5.84351684185247e-09,
10334
+ "loss": 0.6104,
10335
+ "step": 1458
10336
+ },
10337
+ {
10338
+ "epoch": 1.9955547956915711,
10339
+ "grad_norm": 0.5221365690231323,
10340
+ "learning_rate": 3.739863891283335e-09,
10341
+ "loss": 0.9627,
10342
+ "step": 1459
10343
+ },
10344
+ {
10345
+ "epoch": 1.9969225508633954,
10346
+ "grad_norm": 0.5138529539108276,
10347
+ "learning_rate": 2.1036791755801866e-09,
10348
+ "loss": 0.613,
10349
+ "step": 1460
10350
+ },
10351
+ {
10352
+ "epoch": 1.9982903060352197,
10353
+ "grad_norm": 0.5094766020774841,
10354
+ "learning_rate": 9.349703436689617e-10,
10355
+ "loss": 0.8272,
10356
+ "step": 1461
10357
+ },
10358
+ {
10359
+ "epoch": 1.999658061207044,
10360
+ "grad_norm": 0.4561411142349243,
10361
+ "learning_rate": 2.337428590903912e-10,
10362
+ "loss": 0.7996,
10363
+ "step": 1462
10364
+ },
10365
+ {
10366
+ "epoch": 2.0010258163788683,
10367
+ "grad_norm": 1.2804397344589233,
10368
+ "learning_rate": 0.0,
10369
+ "loss": 1.2298,
10370
+ "step": 1463
10371
  }
10372
  ],
10373
  "logging_steps": 1,
 
10391
  "should_evaluate": false,
10392
  "should_log": false,
10393
  "should_save": true,
10394
+ "should_training_stop": true
10395
  },
10396
  "attributes": {}
10397
  }
10398
  },
10399
+ "total_flos": 1.6517521711772467e+18,
10400
  "train_batch_size": 4,
10401
  "trial_name": null,
10402
  "trial_params": null