|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999881495526456, |
|
"eval_steps": 500, |
|
"global_step": 56256, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017775671031581442, |
|
"grad_norm": 1.7014890909194946, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.2156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0035551342063162884, |
|
"grad_norm": 1.0592787265777588, |
|
"learning_rate": 5e-06, |
|
"loss": 2.1147, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005332701309474433, |
|
"grad_norm": 0.7293457388877869, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.9982, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.007110268412632577, |
|
"grad_norm": 0.5222712755203247, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9311, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008887835515790721, |
|
"grad_norm": 0.37243402004241943, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.8657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.010665402618948865, |
|
"grad_norm": 0.47352147102355957, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.8218, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01244296972210701, |
|
"grad_norm": 0.2529783248901367, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.7801, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.014220536825265154, |
|
"grad_norm": 0.2332669496536255, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7856, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0159981039284233, |
|
"grad_norm": 0.3814944326877594, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.7645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.017775671031581442, |
|
"grad_norm": 0.17010626196861267, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.7822, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.019553238134739588, |
|
"grad_norm": 0.1512402594089508, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 1.763, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.02133080523789773, |
|
"grad_norm": 0.18556953966617584, |
|
"learning_rate": 3e-05, |
|
"loss": 1.7465, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.023108372341055877, |
|
"grad_norm": 0.16216696798801422, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 1.752, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.02488593944421402, |
|
"grad_norm": 0.21479512751102448, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.7313, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.026663506547372165, |
|
"grad_norm": 0.1630880981683731, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.7159, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.028441073650530307, |
|
"grad_norm": 0.152820885181427, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7233, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.030218640753688453, |
|
"grad_norm": 0.15548893809318542, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.7333, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0319962078568466, |
|
"grad_norm": 0.437898188829422, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.7188, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.03377377496000474, |
|
"grad_norm": 0.18985818326473236, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.7269, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.035551342063162884, |
|
"grad_norm": 0.21630148589611053, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7373, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03732890916632103, |
|
"grad_norm": 0.14671586453914642, |
|
"learning_rate": 4.9999580904497634e-05, |
|
"loss": 1.7236, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.039106476269479176, |
|
"grad_norm": 0.209241583943367, |
|
"learning_rate": 4.99983236320418e-05, |
|
"loss": 1.7082, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.04088404337263732, |
|
"grad_norm": 0.20861445367336273, |
|
"learning_rate": 4.9996228224785886e-05, |
|
"loss": 1.7319, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.04266161047579546, |
|
"grad_norm": 0.1577518880367279, |
|
"learning_rate": 4.999329475298396e-05, |
|
"loss": 1.7084, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.044439177578953604, |
|
"grad_norm": 0.14321212470531464, |
|
"learning_rate": 4.998952331498839e-05, |
|
"loss": 1.715, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04621674468211175, |
|
"grad_norm": 0.16167956590652466, |
|
"learning_rate": 4.99849140372466e-05, |
|
"loss": 1.7072, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.047994311785269896, |
|
"grad_norm": 0.14405596256256104, |
|
"learning_rate": 4.9979467074296805e-05, |
|
"loss": 1.7354, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.04977187888842804, |
|
"grad_norm": 0.18818779289722443, |
|
"learning_rate": 4.9973182608762805e-05, |
|
"loss": 1.7246, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.05154944599158618, |
|
"grad_norm": 0.14068296551704407, |
|
"learning_rate": 4.996606085134791e-05, |
|
"loss": 1.7138, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.05332701309474433, |
|
"grad_norm": 0.1438419222831726, |
|
"learning_rate": 4.995810204082784e-05, |
|
"loss": 1.7085, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05510458019790247, |
|
"grad_norm": 0.1487807333469391, |
|
"learning_rate": 4.994930644404272e-05, |
|
"loss": 1.703, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.056882147301060615, |
|
"grad_norm": 0.1583404541015625, |
|
"learning_rate": 4.993967435588816e-05, |
|
"loss": 1.7036, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.05865971440421876, |
|
"grad_norm": 0.25886544585227966, |
|
"learning_rate": 4.992920609930535e-05, |
|
"loss": 1.6993, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.06043728150737691, |
|
"grad_norm": 0.18560856580734253, |
|
"learning_rate": 4.991790202527022e-05, |
|
"loss": 1.7111, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.06221484861053505, |
|
"grad_norm": 0.14303149282932281, |
|
"learning_rate": 4.990576251278172e-05, |
|
"loss": 1.7104, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0639924157136932, |
|
"grad_norm": 0.1497068852186203, |
|
"learning_rate": 4.9892787968849033e-05, |
|
"loss": 1.7038, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.06576998281685134, |
|
"grad_norm": 0.17193421721458435, |
|
"learning_rate": 4.987897882847801e-05, |
|
"loss": 1.6955, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.06754754992000948, |
|
"grad_norm": 0.19913499057292938, |
|
"learning_rate": 4.9864335554656526e-05, |
|
"loss": 1.7009, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06932511702316763, |
|
"grad_norm": 0.15236733853816986, |
|
"learning_rate": 4.984885863833901e-05, |
|
"loss": 1.6994, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.07110268412632577, |
|
"grad_norm": 0.18027468025684357, |
|
"learning_rate": 4.9832548598429955e-05, |
|
"loss": 1.6974, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.07288025122948391, |
|
"grad_norm": 0.19036361575126648, |
|
"learning_rate": 4.981540598176649e-05, |
|
"loss": 1.6957, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.07465781833264205, |
|
"grad_norm": 0.18389485776424408, |
|
"learning_rate": 4.979743136310011e-05, |
|
"loss": 1.7133, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0764353854358002, |
|
"grad_norm": 0.16271525621414185, |
|
"learning_rate": 4.977862534507735e-05, |
|
"loss": 1.7093, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.07821295253895835, |
|
"grad_norm": 0.15076510608196259, |
|
"learning_rate": 4.975898855821964e-05, |
|
"loss": 1.7007, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.0799905196421165, |
|
"grad_norm": 0.6439979672431946, |
|
"learning_rate": 4.9738521660902074e-05, |
|
"loss": 1.7049, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.08176808674527464, |
|
"grad_norm": 0.23859179019927979, |
|
"learning_rate": 4.971722533933144e-05, |
|
"loss": 1.7128, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.08354565384843278, |
|
"grad_norm": 0.18766574561595917, |
|
"learning_rate": 4.969510030752314e-05, |
|
"loss": 1.6976, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.08532322095159092, |
|
"grad_norm": 0.1395421326160431, |
|
"learning_rate": 4.9672147307277285e-05, |
|
"loss": 1.6957, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.08710078805474906, |
|
"grad_norm": 0.15366794168949127, |
|
"learning_rate": 4.9648367108153795e-05, |
|
"loss": 1.6966, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.08887835515790721, |
|
"grad_norm": 0.14610810577869415, |
|
"learning_rate": 4.9623760507446646e-05, |
|
"loss": 1.6964, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.09065592226106535, |
|
"grad_norm": 0.19104835391044617, |
|
"learning_rate": 4.9598328330157084e-05, |
|
"loss": 1.697, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.0924334893642235, |
|
"grad_norm": 0.15467491745948792, |
|
"learning_rate": 4.957207142896599e-05, |
|
"loss": 1.7051, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.09421105646738165, |
|
"grad_norm": 0.15119831264019012, |
|
"learning_rate": 4.9544990684205324e-05, |
|
"loss": 1.6961, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.09598862357053979, |
|
"grad_norm": 0.13215667009353638, |
|
"learning_rate": 4.951708700382853e-05, |
|
"loss": 1.6961, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.09776619067369793, |
|
"grad_norm": 0.1464473158121109, |
|
"learning_rate": 4.948836132338017e-05, |
|
"loss": 1.6968, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.09954375777685608, |
|
"grad_norm": 0.17705056071281433, |
|
"learning_rate": 4.945881460596453e-05, |
|
"loss": 1.7023, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.10132132488001422, |
|
"grad_norm": 0.1780652105808258, |
|
"learning_rate": 4.942844784221331e-05, |
|
"loss": 1.7127, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.10309889198317236, |
|
"grad_norm": 0.13343891501426697, |
|
"learning_rate": 4.9397262050252444e-05, |
|
"loss": 1.6882, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.1048764590863305, |
|
"grad_norm": 0.14272858202457428, |
|
"learning_rate": 4.9365258275667935e-05, |
|
"loss": 1.7006, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.10665402618948866, |
|
"grad_norm": 0.1572321206331253, |
|
"learning_rate": 4.933243759147084e-05, |
|
"loss": 1.6909, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1084315932926468, |
|
"grad_norm": 0.1878873109817505, |
|
"learning_rate": 4.9298801098061234e-05, |
|
"loss": 1.7001, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.11020916039580494, |
|
"grad_norm": 0.15412138402462006, |
|
"learning_rate": 4.926434992319137e-05, |
|
"loss": 1.7009, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.11198672749896309, |
|
"grad_norm": 0.16579018533229828, |
|
"learning_rate": 4.922908522192785e-05, |
|
"loss": 1.6903, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.11376429460212123, |
|
"grad_norm": 0.176075279712677, |
|
"learning_rate": 4.919300817661288e-05, |
|
"loss": 1.6814, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.11554186170527937, |
|
"grad_norm": 0.1489766240119934, |
|
"learning_rate": 4.9156119996824646e-05, |
|
"loss": 1.6834, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.11731942880843751, |
|
"grad_norm": 0.14244747161865234, |
|
"learning_rate": 4.911842191933679e-05, |
|
"loss": 1.698, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.11909699591159566, |
|
"grad_norm": 0.18538010120391846, |
|
"learning_rate": 4.9079915208076874e-05, |
|
"loss": 1.7075, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.12087456301475381, |
|
"grad_norm": 0.13722339272499084, |
|
"learning_rate": 4.9040601154084064e-05, |
|
"loss": 1.6904, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.12265213011791196, |
|
"grad_norm": 0.14853331446647644, |
|
"learning_rate": 4.900048107546581e-05, |
|
"loss": 1.7006, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.1244296972210701, |
|
"grad_norm": 0.1475294679403305, |
|
"learning_rate": 4.895955631735369e-05, |
|
"loss": 1.7084, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12620726432422824, |
|
"grad_norm": 0.16500729322433472, |
|
"learning_rate": 4.8917828251858245e-05, |
|
"loss": 1.6824, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.1279848314273864, |
|
"grad_norm": 0.14395256340503693, |
|
"learning_rate": 4.8875727542547924e-05, |
|
"loss": 1.6846, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.12976239853054453, |
|
"grad_norm": 0.14854487776756287, |
|
"learning_rate": 4.8832405083980224e-05, |
|
"loss": 1.72, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.13153996563370268, |
|
"grad_norm": 0.13804668188095093, |
|
"learning_rate": 4.8788283581110025e-05, |
|
"loss": 1.6994, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.1333175327368608, |
|
"grad_norm": 0.19897769391536713, |
|
"learning_rate": 4.874336451322718e-05, |
|
"loss": 1.6748, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.13509509984001897, |
|
"grad_norm": 0.18809333443641663, |
|
"learning_rate": 4.869764938636205e-05, |
|
"loss": 1.7039, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.1368726669431771, |
|
"grad_norm": 0.15036119520664215, |
|
"learning_rate": 4.865113973323494e-05, |
|
"loss": 1.6873, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.13865023404633525, |
|
"grad_norm": 0.24881285429000854, |
|
"learning_rate": 4.8603837113204786e-05, |
|
"loss": 1.7069, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.14042780114949338, |
|
"grad_norm": 0.13876497745513916, |
|
"learning_rate": 4.85557431122168e-05, |
|
"loss": 1.6825, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.14220536825265154, |
|
"grad_norm": 0.1649981439113617, |
|
"learning_rate": 4.850685934274935e-05, |
|
"loss": 1.6943, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1439829353558097, |
|
"grad_norm": 0.14828725159168243, |
|
"learning_rate": 4.845718744375987e-05, |
|
"loss": 1.6928, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.14576050245896782, |
|
"grad_norm": 0.15515898168087006, |
|
"learning_rate": 4.84067290806299e-05, |
|
"loss": 1.6938, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.14753806956212598, |
|
"grad_norm": 0.21222877502441406, |
|
"learning_rate": 4.83554859451093e-05, |
|
"loss": 1.6775, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.1493156366652841, |
|
"grad_norm": 0.14965397119522095, |
|
"learning_rate": 4.830345975525948e-05, |
|
"loss": 1.6952, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.15109320376844226, |
|
"grad_norm": 0.1583070456981659, |
|
"learning_rate": 4.8250652255395806e-05, |
|
"loss": 1.6856, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.1528707708716004, |
|
"grad_norm": 0.1827002763748169, |
|
"learning_rate": 4.819706521602914e-05, |
|
"loss": 1.696, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.15464833797475855, |
|
"grad_norm": 0.21312415599822998, |
|
"learning_rate": 4.8142700433806456e-05, |
|
"loss": 1.6839, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.1564259050779167, |
|
"grad_norm": 0.14075049757957458, |
|
"learning_rate": 4.80875597314506e-05, |
|
"loss": 1.6846, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.15820347218107483, |
|
"grad_norm": 0.15312770009040833, |
|
"learning_rate": 4.8031644957699214e-05, |
|
"loss": 1.6856, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.159981039284233, |
|
"grad_norm": 0.16638757288455963, |
|
"learning_rate": 4.797495798724271e-05, |
|
"loss": 1.6922, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.16175860638739112, |
|
"grad_norm": 0.13447363674640656, |
|
"learning_rate": 4.791750072066143e-05, |
|
"loss": 1.6845, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.16353617349054927, |
|
"grad_norm": 0.1486334651708603, |
|
"learning_rate": 4.785927508436194e-05, |
|
"loss": 1.6966, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1653137405937074, |
|
"grad_norm": 0.1405581384897232, |
|
"learning_rate": 4.780028303051243e-05, |
|
"loss": 1.6883, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.16709130769686556, |
|
"grad_norm": 0.1692507416009903, |
|
"learning_rate": 4.774052653697725e-05, |
|
"loss": 1.6829, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.1688688748000237, |
|
"grad_norm": 0.17827360332012177, |
|
"learning_rate": 4.76800076072506e-05, |
|
"loss": 1.698, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.17064644190318184, |
|
"grad_norm": 0.1813431978225708, |
|
"learning_rate": 4.7618728270389405e-05, |
|
"loss": 1.6936, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.17242400900634, |
|
"grad_norm": 0.15732981264591217, |
|
"learning_rate": 4.755669058094521e-05, |
|
"loss": 1.6756, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.17420157610949813, |
|
"grad_norm": 0.1365622580051422, |
|
"learning_rate": 4.749389661889535e-05, |
|
"loss": 1.6869, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.17597914321265629, |
|
"grad_norm": 0.14390863478183746, |
|
"learning_rate": 4.7430348489573175e-05, |
|
"loss": 1.6986, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.17775671031581441, |
|
"grad_norm": 0.17032405734062195, |
|
"learning_rate": 4.7366048323597524e-05, |
|
"loss": 1.6997, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.17953427741897257, |
|
"grad_norm": 0.15666988492012024, |
|
"learning_rate": 4.73009982768012e-05, |
|
"loss": 1.6908, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.1813118445221307, |
|
"grad_norm": 0.12964856624603271, |
|
"learning_rate": 4.723520053015879e-05, |
|
"loss": 1.676, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.18308941162528886, |
|
"grad_norm": 0.15315160155296326, |
|
"learning_rate": 4.716865728971346e-05, |
|
"loss": 1.6899, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.184866978728447, |
|
"grad_norm": 0.17329467833042145, |
|
"learning_rate": 4.710137078650302e-05, |
|
"loss": 1.6755, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.18664454583160514, |
|
"grad_norm": 0.16102010011672974, |
|
"learning_rate": 4.703334327648516e-05, |
|
"loss": 1.6779, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.1884221129347633, |
|
"grad_norm": 0.170249804854393, |
|
"learning_rate": 4.6964577040461745e-05, |
|
"loss": 1.7001, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.19019968003792143, |
|
"grad_norm": 0.14801470935344696, |
|
"learning_rate": 4.689507438400239e-05, |
|
"loss": 1.6881, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.19197724714107958, |
|
"grad_norm": 0.2009027749300003, |
|
"learning_rate": 4.682483763736718e-05, |
|
"loss": 1.6944, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.1937548142442377, |
|
"grad_norm": 0.15776540338993073, |
|
"learning_rate": 4.6753869155428454e-05, |
|
"loss": 1.6849, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.19553238134739587, |
|
"grad_norm": 0.1666073054075241, |
|
"learning_rate": 4.6682171317591947e-05, |
|
"loss": 1.6986, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.197309948450554, |
|
"grad_norm": 0.204326793551445, |
|
"learning_rate": 4.660974652771698e-05, |
|
"loss": 1.6927, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.19908751555371215, |
|
"grad_norm": 0.17319276928901672, |
|
"learning_rate": 4.653659721403583e-05, |
|
"loss": 1.6804, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.2008650826568703, |
|
"grad_norm": 0.19199158251285553, |
|
"learning_rate": 4.6462725829072386e-05, |
|
"loss": 1.6692, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.20264264976002844, |
|
"grad_norm": 0.15492092072963715, |
|
"learning_rate": 4.638813484955985e-05, |
|
"loss": 1.695, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.2044202168631866, |
|
"grad_norm": 0.2306402027606964, |
|
"learning_rate": 4.631282677635775e-05, |
|
"loss": 1.7068, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.20619778396634472, |
|
"grad_norm": 0.20894396305084229, |
|
"learning_rate": 4.62375678895541e-05, |
|
"loss": 1.7145, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.20797535106950288, |
|
"grad_norm": 0.31019458174705505, |
|
"learning_rate": 4.616084033514059e-05, |
|
"loss": 1.688, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.209752918172661, |
|
"grad_norm": 0.22205297648906708, |
|
"learning_rate": 4.6083403307686204e-05, |
|
"loss": 1.6989, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.21153048527581916, |
|
"grad_norm": 0.15302753448486328, |
|
"learning_rate": 4.600525940347174e-05, |
|
"loss": 1.6929, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.21330805237897732, |
|
"grad_norm": 0.1468563824892044, |
|
"learning_rate": 4.5926411242477904e-05, |
|
"loss": 1.6924, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.21508561948213545, |
|
"grad_norm": 0.1425103396177292, |
|
"learning_rate": 4.584686146829748e-05, |
|
"loss": 1.6904, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.2168631865852936, |
|
"grad_norm": 0.1582684963941574, |
|
"learning_rate": 4.5766612748046654e-05, |
|
"loss": 1.6804, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.21864075368845173, |
|
"grad_norm": 0.16768227517604828, |
|
"learning_rate": 4.5685667772275654e-05, |
|
"loss": 1.6796, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.2204183207916099, |
|
"grad_norm": 0.1611669808626175, |
|
"learning_rate": 4.56040292548785e-05, |
|
"loss": 1.6749, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.22219588789476802, |
|
"grad_norm": 0.13350994884967804, |
|
"learning_rate": 4.5521699933002026e-05, |
|
"loss": 1.7013, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.22397345499792617, |
|
"grad_norm": 0.14940309524536133, |
|
"learning_rate": 4.5438682566954124e-05, |
|
"loss": 1.6814, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.2257510221010843, |
|
"grad_norm": 0.13618171215057373, |
|
"learning_rate": 4.5354979940111166e-05, |
|
"loss": 1.6852, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.22752858920424246, |
|
"grad_norm": 0.13858729600906372, |
|
"learning_rate": 4.52705948588247e-05, |
|
"loss": 1.7117, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.22930615630740062, |
|
"grad_norm": 0.1507061868906021, |
|
"learning_rate": 4.518553015232737e-05, |
|
"loss": 1.6789, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.23108372341055874, |
|
"grad_norm": 0.17016680538654327, |
|
"learning_rate": 4.5099788672638064e-05, |
|
"loss": 1.6925, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.2328612905137169, |
|
"grad_norm": 0.1454281359910965, |
|
"learning_rate": 4.501337329446625e-05, |
|
"loss": 1.6942, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.23463885761687503, |
|
"grad_norm": 0.13199830055236816, |
|
"learning_rate": 4.492628691511563e-05, |
|
"loss": 1.6844, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.23641642472003319, |
|
"grad_norm": 0.1504441648721695, |
|
"learning_rate": 4.483853245438702e-05, |
|
"loss": 1.6803, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.23819399182319131, |
|
"grad_norm": 0.14603202044963837, |
|
"learning_rate": 4.4750112854480376e-05, |
|
"loss": 1.6776, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.23997155892634947, |
|
"grad_norm": 0.20005132257938385, |
|
"learning_rate": 4.466103107989624e-05, |
|
"loss": 1.6995, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.24174912602950763, |
|
"grad_norm": 0.20756611227989197, |
|
"learning_rate": 4.457129011733629e-05, |
|
"loss": 1.691, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.24352669313266576, |
|
"grad_norm": 0.1558232605457306, |
|
"learning_rate": 4.448089297560325e-05, |
|
"loss": 1.6815, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.2453042602358239, |
|
"grad_norm": 0.18202444911003113, |
|
"learning_rate": 4.4389842685499944e-05, |
|
"loss": 1.6758, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.24708182733898204, |
|
"grad_norm": 0.1685715764760971, |
|
"learning_rate": 4.429814229972775e-05, |
|
"loss": 1.684, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.2488593944421402, |
|
"grad_norm": 0.1511525958776474, |
|
"learning_rate": 4.420579489278419e-05, |
|
"loss": 1.672, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.25063696154529835, |
|
"grad_norm": 0.13901682198047638, |
|
"learning_rate": 4.411280356085991e-05, |
|
"loss": 1.6787, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.2524145286484565, |
|
"grad_norm": 0.15039555728435516, |
|
"learning_rate": 4.4019171421734826e-05, |
|
"loss": 1.6854, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.2541920957516146, |
|
"grad_norm": 0.14443428814411163, |
|
"learning_rate": 4.392490161467361e-05, |
|
"loss": 1.692, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.2559696628547728, |
|
"grad_norm": 0.1846003532409668, |
|
"learning_rate": 4.382999730032042e-05, |
|
"loss": 1.6828, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.2577472299579309, |
|
"grad_norm": 0.1854531168937683, |
|
"learning_rate": 4.3734461660592985e-05, |
|
"loss": 1.687, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.25952479706108905, |
|
"grad_norm": 0.21927309036254883, |
|
"learning_rate": 4.363829789857584e-05, |
|
"loss": 1.6873, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.2613023641642472, |
|
"grad_norm": 0.22467108070850372, |
|
"learning_rate": 4.3541509238413e-05, |
|
"loss": 1.6893, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.26307993126740536, |
|
"grad_norm": 0.20354901254177094, |
|
"learning_rate": 4.344409892519985e-05, |
|
"loss": 1.6937, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.2648574983705635, |
|
"grad_norm": 0.15710541605949402, |
|
"learning_rate": 4.3346070224874304e-05, |
|
"loss": 1.6897, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.2666350654737216, |
|
"grad_norm": 0.16541948914527893, |
|
"learning_rate": 4.3247426424107364e-05, |
|
"loss": 1.6786, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.26841263257687975, |
|
"grad_norm": 0.2642144560813904, |
|
"learning_rate": 4.314817083019289e-05, |
|
"loss": 1.6734, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.27019019968003793, |
|
"grad_norm": 0.15868282318115234, |
|
"learning_rate": 4.3048306770936716e-05, |
|
"loss": 1.6839, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.27196776678319606, |
|
"grad_norm": 0.20308874547481537, |
|
"learning_rate": 4.2947837594545094e-05, |
|
"loss": 1.6897, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.2737453338863542, |
|
"grad_norm": 0.1677379161119461, |
|
"learning_rate": 4.2847780346308484e-05, |
|
"loss": 1.6795, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.2755229009895124, |
|
"grad_norm": 0.14271363615989685, |
|
"learning_rate": 4.27461170280642e-05, |
|
"loss": 1.6788, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.2773004680926705, |
|
"grad_norm": 0.16974543035030365, |
|
"learning_rate": 4.2643858724393424e-05, |
|
"loss": 1.6868, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.27907803519582863, |
|
"grad_norm": 0.15350034832954407, |
|
"learning_rate": 4.254100886377579e-05, |
|
"loss": 1.6737, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.28085560229898676, |
|
"grad_norm": 0.18880531191825867, |
|
"learning_rate": 4.2437570894524404e-05, |
|
"loss": 1.6816, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.28263316940214495, |
|
"grad_norm": 0.14773619174957275, |
|
"learning_rate": 4.233354828467028e-05, |
|
"loss": 1.6799, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.2844107365053031, |
|
"grad_norm": 0.1591775268316269, |
|
"learning_rate": 4.2228944521846054e-05, |
|
"loss": 1.6704, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2861883036084612, |
|
"grad_norm": 0.1422175019979477, |
|
"learning_rate": 4.2123763113169053e-05, |
|
"loss": 1.6882, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.2879658707116194, |
|
"grad_norm": 0.1634337157011032, |
|
"learning_rate": 4.2018007585123695e-05, |
|
"loss": 1.6716, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.2897434378147775, |
|
"grad_norm": 0.1616571843624115, |
|
"learning_rate": 4.1911681483443284e-05, |
|
"loss": 1.6814, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.29152100491793564, |
|
"grad_norm": 0.1432926207780838, |
|
"learning_rate": 4.180478837299109e-05, |
|
"loss": 1.6781, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.2932985720210938, |
|
"grad_norm": 0.14793144166469574, |
|
"learning_rate": 4.1697331837640866e-05, |
|
"loss": 1.675, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.29507613912425196, |
|
"grad_norm": 0.14463911950588226, |
|
"learning_rate": 4.158931548015665e-05, |
|
"loss": 1.6866, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.2968537062274101, |
|
"grad_norm": 0.14069664478302002, |
|
"learning_rate": 4.148074292207203e-05, |
|
"loss": 1.6848, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.2986312733305682, |
|
"grad_norm": 0.16380813717842102, |
|
"learning_rate": 4.137161780356866e-05, |
|
"loss": 1.6676, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.3004088404337264, |
|
"grad_norm": 0.16407877206802368, |
|
"learning_rate": 4.126304322856126e-05, |
|
"loss": 1.6757, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.3021864075368845, |
|
"grad_norm": 0.1595907211303711, |
|
"learning_rate": 4.1152829417731065e-05, |
|
"loss": 1.6894, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.30396397464004266, |
|
"grad_norm": 0.1606622189283371, |
|
"learning_rate": 4.104207404064811e-05, |
|
"loss": 1.675, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.3057415417432008, |
|
"grad_norm": 0.1525093913078308, |
|
"learning_rate": 4.093078081067882e-05, |
|
"loss": 1.6864, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.30751910884635897, |
|
"grad_norm": 0.18236620724201202, |
|
"learning_rate": 4.081895345922257e-05, |
|
"loss": 1.6756, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.3092966759495171, |
|
"grad_norm": 0.1441909819841385, |
|
"learning_rate": 4.070659573558656e-05, |
|
"loss": 1.6889, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.3110742430526752, |
|
"grad_norm": 0.182451993227005, |
|
"learning_rate": 4.059371140686013e-05, |
|
"loss": 1.6873, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.3128518101558334, |
|
"grad_norm": 0.17770905792713165, |
|
"learning_rate": 4.048030425778841e-05, |
|
"loss": 1.6881, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.31462937725899154, |
|
"grad_norm": 0.14115692675113678, |
|
"learning_rate": 4.0366378090645516e-05, |
|
"loss": 1.6789, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.31640694436214967, |
|
"grad_norm": 0.1899385303258896, |
|
"learning_rate": 4.0251936725106985e-05, |
|
"loss": 1.6796, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.3181845114653078, |
|
"grad_norm": 0.15735557675361633, |
|
"learning_rate": 4.013698399812173e-05, |
|
"loss": 1.6774, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.319962078568466, |
|
"grad_norm": 0.19576773047447205, |
|
"learning_rate": 4.002152376378343e-05, |
|
"loss": 1.6815, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.3217396456716241, |
|
"grad_norm": 0.17470435798168182, |
|
"learning_rate": 3.9905559893201285e-05, |
|
"loss": 1.6879, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.32351721277478224, |
|
"grad_norm": 0.2007114738225937, |
|
"learning_rate": 3.9789096274370205e-05, |
|
"loss": 1.6728, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.32529477987794037, |
|
"grad_norm": 0.13873660564422607, |
|
"learning_rate": 3.967213681204051e-05, |
|
"loss": 1.6911, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.32707234698109855, |
|
"grad_norm": 0.15716473758220673, |
|
"learning_rate": 3.955468542758697e-05, |
|
"loss": 1.6881, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.3288499140842567, |
|
"grad_norm": 0.15948426723480225, |
|
"learning_rate": 3.9436746058877335e-05, |
|
"loss": 1.7005, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.3306274811874148, |
|
"grad_norm": 0.15321232378482819, |
|
"learning_rate": 3.9318322660140324e-05, |
|
"loss": 1.6858, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.332405048290573, |
|
"grad_norm": 0.16375650465488434, |
|
"learning_rate": 3.919941920183305e-05, |
|
"loss": 1.6702, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.3341826153937311, |
|
"grad_norm": 0.14579662680625916, |
|
"learning_rate": 3.908003967050787e-05, |
|
"loss": 1.6779, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.33596018249688925, |
|
"grad_norm": 0.19252930581569672, |
|
"learning_rate": 3.896018806867876e-05, |
|
"loss": 1.6847, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.3377377496000474, |
|
"grad_norm": 0.1748981475830078, |
|
"learning_rate": 3.88398684146871e-05, |
|
"loss": 1.6524, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.33951531670320556, |
|
"grad_norm": 0.14768213033676147, |
|
"learning_rate": 3.871908474256696e-05, |
|
"loss": 1.6621, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.3412928838063637, |
|
"grad_norm": 0.18400093913078308, |
|
"learning_rate": 3.859784110190985e-05, |
|
"loss": 1.6792, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.3430704509095218, |
|
"grad_norm": 0.1892794668674469, |
|
"learning_rate": 3.8476141557728906e-05, |
|
"loss": 1.6883, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.34484801801268, |
|
"grad_norm": 0.13941031694412231, |
|
"learning_rate": 3.835399019032268e-05, |
|
"loss": 1.6685, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.34662558511583813, |
|
"grad_norm": 0.13327963650226593, |
|
"learning_rate": 3.8231391095138236e-05, |
|
"loss": 1.6791, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.34840315221899626, |
|
"grad_norm": 0.14174780249595642, |
|
"learning_rate": 3.810834838263396e-05, |
|
"loss": 1.6789, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.3501807193221544, |
|
"grad_norm": 0.2639550268650055, |
|
"learning_rate": 3.798486617814162e-05, |
|
"loss": 1.6694, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.35195828642531257, |
|
"grad_norm": 0.14735499024391174, |
|
"learning_rate": 3.786094862172816e-05, |
|
"loss": 1.6751, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.3537358535284707, |
|
"grad_norm": 0.1680241823196411, |
|
"learning_rate": 3.7736599868056804e-05, |
|
"loss": 1.6791, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.35551342063162883, |
|
"grad_norm": 0.15196190774440765, |
|
"learning_rate": 3.761182408624783e-05, |
|
"loss": 1.6741, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.357290987734787, |
|
"grad_norm": 0.14523537456989288, |
|
"learning_rate": 3.748662545973876e-05, |
|
"loss": 1.6732, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.35906855483794514, |
|
"grad_norm": 0.1658225953578949, |
|
"learning_rate": 3.7361008186144095e-05, |
|
"loss": 1.6842, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.36084612194110327, |
|
"grad_norm": 0.2060202807188034, |
|
"learning_rate": 3.723497647711458e-05, |
|
"loss": 1.6757, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.3626236890442614, |
|
"grad_norm": 0.15790830552577972, |
|
"learning_rate": 3.7108534558196005e-05, |
|
"loss": 1.6613, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.3644012561474196, |
|
"grad_norm": 0.15922047197818756, |
|
"learning_rate": 3.6981686668687545e-05, |
|
"loss": 1.6623, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.3661788232505777, |
|
"grad_norm": 0.17766642570495605, |
|
"learning_rate": 3.685443706149958e-05, |
|
"loss": 1.6847, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.36795639035373584, |
|
"grad_norm": 0.1501617580652237, |
|
"learning_rate": 3.672679000301118e-05, |
|
"loss": 1.6717, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.369733957456894, |
|
"grad_norm": 0.1573089063167572, |
|
"learning_rate": 3.659874977292696e-05, |
|
"loss": 1.6723, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.37151152456005215, |
|
"grad_norm": 0.15815529227256775, |
|
"learning_rate": 3.647032066413372e-05, |
|
"loss": 1.6782, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.3732890916632103, |
|
"grad_norm": 0.16356757283210754, |
|
"learning_rate": 3.634150698255639e-05, |
|
"loss": 1.6694, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.3750666587663684, |
|
"grad_norm": 0.14859165251255035, |
|
"learning_rate": 3.6213606854414085e-05, |
|
"loss": 1.6686, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.3768442258695266, |
|
"grad_norm": 0.15533782541751862, |
|
"learning_rate": 3.608404073421511e-05, |
|
"loss": 1.6675, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.3786217929726847, |
|
"grad_norm": 0.1758899837732315, |
|
"learning_rate": 3.595410299228654e-05, |
|
"loss": 1.6786, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.38039936007584285, |
|
"grad_norm": 0.15762227773666382, |
|
"learning_rate": 3.582379798513425e-05, |
|
"loss": 1.6662, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.382176927179001, |
|
"grad_norm": 0.1720816045999527, |
|
"learning_rate": 3.569313008157762e-05, |
|
"loss": 1.6942, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.38395449428215916, |
|
"grad_norm": 0.17334651947021484, |
|
"learning_rate": 3.556210366260312e-05, |
|
"loss": 1.6791, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.3857320613853173, |
|
"grad_norm": 0.15751953423023224, |
|
"learning_rate": 3.5430723121217376e-05, |
|
"loss": 1.6784, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.3875096284884754, |
|
"grad_norm": 0.16097095608711243, |
|
"learning_rate": 3.529899286229991e-05, |
|
"loss": 1.6689, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.3892871955916336, |
|
"grad_norm": 0.15864881873130798, |
|
"learning_rate": 3.5166917302455425e-05, |
|
"loss": 1.6738, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.39106476269479173, |
|
"grad_norm": 0.1525215208530426, |
|
"learning_rate": 3.5034500869865796e-05, |
|
"loss": 1.6887, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.39284232979794986, |
|
"grad_norm": 0.14002804458141327, |
|
"learning_rate": 3.490174800414151e-05, |
|
"loss": 1.6745, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.394619896901108, |
|
"grad_norm": 0.17650793492794037, |
|
"learning_rate": 3.47686631561729e-05, |
|
"loss": 1.6713, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.3963974640042662, |
|
"grad_norm": 0.16852478682994843, |
|
"learning_rate": 3.463525078798085e-05, |
|
"loss": 1.6872, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.3981750311074243, |
|
"grad_norm": 0.16134943068027496, |
|
"learning_rate": 3.450151537256725e-05, |
|
"loss": 1.677, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.39995259821058243, |
|
"grad_norm": 0.15445928275585175, |
|
"learning_rate": 3.4367461393764976e-05, |
|
"loss": 1.673, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.4017301653137406, |
|
"grad_norm": 0.15707698464393616, |
|
"learning_rate": 3.42330933460876e-05, |
|
"loss": 1.6687, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.40350773241689875, |
|
"grad_norm": 0.13525037467479706, |
|
"learning_rate": 3.4098415734578684e-05, |
|
"loss": 1.6729, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.4052852995200569, |
|
"grad_norm": 0.15618863701820374, |
|
"learning_rate": 3.3963433074660714e-05, |
|
"loss": 1.684, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.407062866623215, |
|
"grad_norm": 0.18125438690185547, |
|
"learning_rate": 3.382814989198375e-05, |
|
"loss": 1.6793, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.4088404337263732, |
|
"grad_norm": 0.1549660563468933, |
|
"learning_rate": 3.3692570722273676e-05, |
|
"loss": 1.6848, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.4106180008295313, |
|
"grad_norm": 0.18558810651302338, |
|
"learning_rate": 3.35567001111801e-05, |
|
"loss": 1.6687, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.41239556793268944, |
|
"grad_norm": 0.18007346987724304, |
|
"learning_rate": 3.3420542614123984e-05, |
|
"loss": 1.6714, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.41417313503584763, |
|
"grad_norm": 0.15658414363861084, |
|
"learning_rate": 3.328683432967708e-05, |
|
"loss": 1.6799, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.41595070213900576, |
|
"grad_norm": 0.18134590983390808, |
|
"learning_rate": 3.3150122275317875e-05, |
|
"loss": 1.6743, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.4177282692421639, |
|
"grad_norm": 0.15867780148983002, |
|
"learning_rate": 3.3013136966591515e-05, |
|
"loss": 1.6683, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.419505836345322, |
|
"grad_norm": 0.17692945897579193, |
|
"learning_rate": 3.287588299629216e-05, |
|
"loss": 1.6685, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.4212834034484802, |
|
"grad_norm": 0.13905645906925201, |
|
"learning_rate": 3.273836496622152e-05, |
|
"loss": 1.6715, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.4230609705516383, |
|
"grad_norm": 0.1454002857208252, |
|
"learning_rate": 3.260058748703464e-05, |
|
"loss": 1.6773, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.42483853765479646, |
|
"grad_norm": 0.13487789034843445, |
|
"learning_rate": 3.2462555178085255e-05, |
|
"loss": 1.655, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.42661610475795464, |
|
"grad_norm": 0.1867651492357254, |
|
"learning_rate": 3.2324272667270975e-05, |
|
"loss": 1.6725, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.42839367186111277, |
|
"grad_norm": 0.14305393397808075, |
|
"learning_rate": 3.218574459087805e-05, |
|
"loss": 1.6717, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.4301712389642709, |
|
"grad_norm": 0.14234061539173126, |
|
"learning_rate": 3.2046975593425975e-05, |
|
"loss": 1.6917, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.431948806067429, |
|
"grad_norm": 0.15563951432704926, |
|
"learning_rate": 3.1907970327511786e-05, |
|
"loss": 1.6725, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.4337263731705872, |
|
"grad_norm": 0.14877410233020782, |
|
"learning_rate": 3.176873345365402e-05, |
|
"loss": 1.6802, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.43550394027374534, |
|
"grad_norm": 0.16491292417049408, |
|
"learning_rate": 3.162926964013648e-05, |
|
"loss": 1.6671, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.43728150737690347, |
|
"grad_norm": 0.1698901653289795, |
|
"learning_rate": 3.1489583562851724e-05, |
|
"loss": 1.6782, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.4390590744800616, |
|
"grad_norm": 0.18841049075126648, |
|
"learning_rate": 3.1349679905144285e-05, |
|
"loss": 1.6671, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.4408366415832198, |
|
"grad_norm": 0.14066390693187714, |
|
"learning_rate": 3.120956335765367e-05, |
|
"loss": 1.6597, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.4426142086863779, |
|
"grad_norm": 0.14379048347473145, |
|
"learning_rate": 3.1069238618157064e-05, |
|
"loss": 1.6696, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.44439177578953604, |
|
"grad_norm": 0.17776834964752197, |
|
"learning_rate": 3.092871039141184e-05, |
|
"loss": 1.6769, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4461693428926942, |
|
"grad_norm": 0.1451658457517624, |
|
"learning_rate": 3.078798338899784e-05, |
|
"loss": 1.6727, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.44794690999585235, |
|
"grad_norm": 0.14523907005786896, |
|
"learning_rate": 3.064706232915933e-05, |
|
"loss": 1.6858, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.4497244770990105, |
|
"grad_norm": 0.17594589293003082, |
|
"learning_rate": 3.050595193664693e-05, |
|
"loss": 1.6599, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.4515020442021686, |
|
"grad_norm": 0.14906199276447296, |
|
"learning_rate": 3.0364656942559087e-05, |
|
"loss": 1.666, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.4532796113053268, |
|
"grad_norm": 0.20227928459644318, |
|
"learning_rate": 3.0223182084183545e-05, |
|
"loss": 1.6799, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.4550571784084849, |
|
"grad_norm": 0.15447662770748138, |
|
"learning_rate": 3.0081532104838424e-05, |
|
"loss": 1.6709, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.45683474551164305, |
|
"grad_norm": 0.1919887661933899, |
|
"learning_rate": 2.9939711753713285e-05, |
|
"loss": 1.6863, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.45861231261480123, |
|
"grad_norm": 0.23030731081962585, |
|
"learning_rate": 2.9797725785709828e-05, |
|
"loss": 1.68, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.46038987971795936, |
|
"grad_norm": 0.14024241268634796, |
|
"learning_rate": 2.9655578961282497e-05, |
|
"loss": 1.6705, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.4621674468211175, |
|
"grad_norm": 0.14363612234592438, |
|
"learning_rate": 2.951327604627888e-05, |
|
"loss": 1.6695, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.4639450139242756, |
|
"grad_norm": 0.15318314731121063, |
|
"learning_rate": 2.9370821811779908e-05, |
|
"loss": 1.6665, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.4657225810274338, |
|
"grad_norm": 0.16767314076423645, |
|
"learning_rate": 2.9228221033939895e-05, |
|
"loss": 1.6627, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.46750014813059193, |
|
"grad_norm": 0.18546494841575623, |
|
"learning_rate": 2.9085478493826413e-05, |
|
"loss": 1.6892, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.46927771523375006, |
|
"grad_norm": 0.14965227246284485, |
|
"learning_rate": 2.8942598977259995e-05, |
|
"loss": 1.6681, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.47105528233690824, |
|
"grad_norm": 0.1423717737197876, |
|
"learning_rate": 2.879958727465365e-05, |
|
"loss": 1.662, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.47283284944006637, |
|
"grad_norm": 0.154624342918396, |
|
"learning_rate": 2.8656448180852285e-05, |
|
"loss": 1.683, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.4746104165432245, |
|
"grad_norm": 0.14358487725257874, |
|
"learning_rate": 2.8513186494971944e-05, |
|
"loss": 1.6704, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.47638798364638263, |
|
"grad_norm": 0.14034679532051086, |
|
"learning_rate": 2.836980702023888e-05, |
|
"loss": 1.672, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.4781655507495408, |
|
"grad_norm": 0.14413665235042572, |
|
"learning_rate": 2.822631456382853e-05, |
|
"loss": 1.6645, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.47994311785269894, |
|
"grad_norm": 0.14380885660648346, |
|
"learning_rate": 2.8082713936704348e-05, |
|
"loss": 1.6671, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.48172068495585707, |
|
"grad_norm": 0.15115170180797577, |
|
"learning_rate": 2.7939009953456487e-05, |
|
"loss": 1.6714, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.48349825205901525, |
|
"grad_norm": 0.16167448461055756, |
|
"learning_rate": 2.779520743214039e-05, |
|
"loss": 1.6691, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.4852758191621734, |
|
"grad_norm": 0.14348022639751434, |
|
"learning_rate": 2.765131119411526e-05, |
|
"loss": 1.6723, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.4870533862653315, |
|
"grad_norm": 0.14456488192081451, |
|
"learning_rate": 2.7507326063882376e-05, |
|
"loss": 1.6724, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.48883095336848964, |
|
"grad_norm": 0.15705521404743195, |
|
"learning_rate": 2.7363256868923388e-05, |
|
"loss": 1.6699, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.4906085204716478, |
|
"grad_norm": 0.1625920534133911, |
|
"learning_rate": 2.721910843953842e-05, |
|
"loss": 1.6644, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.49238608757480595, |
|
"grad_norm": 0.13969144225120544, |
|
"learning_rate": 2.7074885608684154e-05, |
|
"loss": 1.679, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.4941636546779641, |
|
"grad_norm": 0.1736816018819809, |
|
"learning_rate": 2.6930593211811763e-05, |
|
"loss": 1.672, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.4959412217811222, |
|
"grad_norm": 0.19090887904167175, |
|
"learning_rate": 2.678767996247037e-05, |
|
"loss": 1.6787, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.4977187888842804, |
|
"grad_norm": 0.17863860726356506, |
|
"learning_rate": 2.6643263524000922e-05, |
|
"loss": 1.6841, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.4994963559874385, |
|
"grad_norm": 0.1446855366230011, |
|
"learning_rate": 2.64987919907833e-05, |
|
"loss": 1.6655, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.5012739230905967, |
|
"grad_norm": 0.1714792400598526, |
|
"learning_rate": 2.6354270206607095e-05, |
|
"loss": 1.6716, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.5030514901937548, |
|
"grad_norm": 0.1719600409269333, |
|
"learning_rate": 2.6209703016946675e-05, |
|
"loss": 1.6551, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.504829057296913, |
|
"grad_norm": 0.18065394461154938, |
|
"learning_rate": 2.6065095268798772e-05, |
|
"loss": 1.6647, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.5066066244000711, |
|
"grad_norm": 0.14981447160243988, |
|
"learning_rate": 2.5920451810519935e-05, |
|
"loss": 1.6666, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.5083841915032292, |
|
"grad_norm": 0.14168864488601685, |
|
"learning_rate": 2.5775777491663976e-05, |
|
"loss": 1.6619, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.5101617586063873, |
|
"grad_norm": 0.1458740234375, |
|
"learning_rate": 2.563107716281941e-05, |
|
"loss": 1.6616, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.5119393257095456, |
|
"grad_norm": 0.15625467896461487, |
|
"learning_rate": 2.5486355675446804e-05, |
|
"loss": 1.6606, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.5137168928127037, |
|
"grad_norm": 0.1387881189584732, |
|
"learning_rate": 2.5341617881716105e-05, |
|
"loss": 1.6714, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.5154944599158618, |
|
"grad_norm": 0.16523011028766632, |
|
"learning_rate": 2.5196868634343986e-05, |
|
"loss": 1.6772, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.51727202701902, |
|
"grad_norm": 0.16577035188674927, |
|
"learning_rate": 2.505211278643112e-05, |
|
"loss": 1.6662, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.5190495941221781, |
|
"grad_norm": 0.20573437213897705, |
|
"learning_rate": 2.490735519129951e-05, |
|
"loss": 1.6763, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.5208271612253362, |
|
"grad_norm": 0.1646687239408493, |
|
"learning_rate": 2.4762600702329707e-05, |
|
"loss": 1.6713, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.5226047283284944, |
|
"grad_norm": 0.15127506852149963, |
|
"learning_rate": 2.461785417279814e-05, |
|
"loss": 1.6754, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.5243822954316525, |
|
"grad_norm": 0.13983801007270813, |
|
"learning_rate": 2.4473120455714367e-05, |
|
"loss": 1.6683, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.5261598625348107, |
|
"grad_norm": 0.14342284202575684, |
|
"learning_rate": 2.4328404403658382e-05, |
|
"loss": 1.6817, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.5279374296379689, |
|
"grad_norm": 0.16623562574386597, |
|
"learning_rate": 2.41837108686179e-05, |
|
"loss": 1.6819, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.529714996741127, |
|
"grad_norm": 0.14822550117969513, |
|
"learning_rate": 2.4039044701825705e-05, |
|
"loss": 1.6691, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.5314925638442851, |
|
"grad_norm": 0.28164225816726685, |
|
"learning_rate": 2.3894410753596987e-05, |
|
"loss": 1.6736, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.5332701309474432, |
|
"grad_norm": 0.20413027703762054, |
|
"learning_rate": 2.3751259642565925e-05, |
|
"loss": 1.6758, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.5350476980506014, |
|
"grad_norm": 0.15408293902873993, |
|
"learning_rate": 2.36067042347753e-05, |
|
"loss": 1.6683, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.5368252651537595, |
|
"grad_norm": 0.14037184417247772, |
|
"learning_rate": 2.346219554090377e-05, |
|
"loss": 1.6692, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.5386028322569177, |
|
"grad_norm": 0.15781673789024353, |
|
"learning_rate": 2.3317738405986828e-05, |
|
"loss": 1.6627, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.5403803993600759, |
|
"grad_norm": 0.1486879140138626, |
|
"learning_rate": 2.3173337673331313e-05, |
|
"loss": 1.6728, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.542157966463234, |
|
"grad_norm": 0.20719771087169647, |
|
"learning_rate": 2.302899818435304e-05, |
|
"loss": 1.665, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.5439355335663921, |
|
"grad_norm": 0.16389068961143494, |
|
"learning_rate": 2.288472477841445e-05, |
|
"loss": 1.671, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.5457131006695503, |
|
"grad_norm": 0.17496538162231445, |
|
"learning_rate": 2.274052229266239e-05, |
|
"loss": 1.6686, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.5474906677727084, |
|
"grad_norm": 0.32813844084739685, |
|
"learning_rate": 2.259639556186592e-05, |
|
"loss": 1.6752, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.5492682348758665, |
|
"grad_norm": 0.5157455205917358, |
|
"learning_rate": 2.2452349418254213e-05, |
|
"loss": 1.6838, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.5510458019790248, |
|
"grad_norm": 0.14271293580532074, |
|
"learning_rate": 2.2308388691354538e-05, |
|
"loss": 1.6753, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.5528233690821829, |
|
"grad_norm": 0.28928157687187195, |
|
"learning_rate": 2.216451820783035e-05, |
|
"loss": 1.6693, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.554600936185341, |
|
"grad_norm": 0.144424170255661, |
|
"learning_rate": 2.2020742791319452e-05, |
|
"loss": 1.6622, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.5563785032884991, |
|
"grad_norm": 0.16234175860881805, |
|
"learning_rate": 2.1877067262272284e-05, |
|
"loss": 1.6818, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.5581560703916573, |
|
"grad_norm": 0.19830650091171265, |
|
"learning_rate": 2.173349643779028e-05, |
|
"loss": 1.6768, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.5599336374948154, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2.159146918661628e-05, |
|
"loss": 1.6824, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.5617112045979735, |
|
"grad_norm": 0.16732582449913025, |
|
"learning_rate": 2.144812104128816e-05, |
|
"loss": 1.6745, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.5634887717011318, |
|
"grad_norm": 0.1458720713853836, |
|
"learning_rate": 2.130489198207977e-05, |
|
"loss": 1.6559, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.5652663388042899, |
|
"grad_norm": 0.1879144012928009, |
|
"learning_rate": 2.1161786811123463e-05, |
|
"loss": 1.6582, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.567043905907448, |
|
"grad_norm": 0.15326225757598877, |
|
"learning_rate": 2.1018810326397926e-05, |
|
"loss": 1.6632, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.5688214730106061, |
|
"grad_norm": 0.14871954917907715, |
|
"learning_rate": 2.087596732156729e-05, |
|
"loss": 1.6683, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.5705990401137643, |
|
"grad_norm": 0.138087660074234, |
|
"learning_rate": 2.073326258582043e-05, |
|
"loss": 1.6687, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.5723766072169224, |
|
"grad_norm": 0.1453862488269806, |
|
"learning_rate": 2.0592125796717588e-05, |
|
"loss": 1.6658, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.5741541743200805, |
|
"grad_norm": 0.14493419229984283, |
|
"learning_rate": 2.044971044602353e-05, |
|
"loss": 1.6667, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.5759317414232388, |
|
"grad_norm": 0.22620221972465515, |
|
"learning_rate": 2.0307447655800402e-05, |
|
"loss": 1.6818, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.5777093085263969, |
|
"grad_norm": 0.14831425249576569, |
|
"learning_rate": 2.016534219578384e-05, |
|
"loss": 1.6518, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.579486875629555, |
|
"grad_norm": 0.15642555058002472, |
|
"learning_rate": 2.0023398830434578e-05, |
|
"loss": 1.6578, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.5812644427327132, |
|
"grad_norm": 0.18855425715446472, |
|
"learning_rate": 1.9881622318778698e-05, |
|
"loss": 1.6719, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.5830420098358713, |
|
"grad_norm": 0.14109855890274048, |
|
"learning_rate": 1.974001741424807e-05, |
|
"loss": 1.6672, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.5848195769390294, |
|
"grad_norm": 0.16638268530368805, |
|
"learning_rate": 1.959858886452098e-05, |
|
"loss": 1.6732, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.5865971440421875, |
|
"grad_norm": 0.15555280447006226, |
|
"learning_rate": 1.9457341411362953e-05, |
|
"loss": 1.6738, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5883747111453458, |
|
"grad_norm": 0.19915728271007538, |
|
"learning_rate": 1.9316279790467785e-05, |
|
"loss": 1.6828, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.5901522782485039, |
|
"grad_norm": 0.14633417129516602, |
|
"learning_rate": 1.9175408731298737e-05, |
|
"loss": 1.6582, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.591929845351662, |
|
"grad_norm": 0.15195755660533905, |
|
"learning_rate": 1.9034732956930004e-05, |
|
"loss": 1.6722, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.5937074124548202, |
|
"grad_norm": 0.1364789605140686, |
|
"learning_rate": 1.8894257183888324e-05, |
|
"loss": 1.6797, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.5954849795579783, |
|
"grad_norm": 0.1638212352991104, |
|
"learning_rate": 1.8753986121994874e-05, |
|
"loss": 1.6607, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.5972625466611364, |
|
"grad_norm": 0.16077277064323425, |
|
"learning_rate": 1.8613924474207344e-05, |
|
"loss": 1.6731, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.5990401137642946, |
|
"grad_norm": 0.16311664879322052, |
|
"learning_rate": 1.8474076936462277e-05, |
|
"loss": 1.6604, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.6008176808674528, |
|
"grad_norm": 0.16206273436546326, |
|
"learning_rate": 1.833444819751758e-05, |
|
"loss": 1.6769, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.6025952479706109, |
|
"grad_norm": 0.1508202999830246, |
|
"learning_rate": 1.8195042938795387e-05, |
|
"loss": 1.6739, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.604372815073769, |
|
"grad_norm": 0.16781938076019287, |
|
"learning_rate": 1.8055865834225045e-05, |
|
"loss": 1.6665, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.6061503821769272, |
|
"grad_norm": 0.1427120417356491, |
|
"learning_rate": 1.7916921550086444e-05, |
|
"loss": 1.68, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.6079279492800853, |
|
"grad_norm": 0.15128500759601593, |
|
"learning_rate": 1.7778214744853537e-05, |
|
"loss": 1.6666, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.6097055163832434, |
|
"grad_norm": 0.19677314162254333, |
|
"learning_rate": 1.76397500690382e-05, |
|
"loss": 1.673, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.6114830834864016, |
|
"grad_norm": 0.1646704375743866, |
|
"learning_rate": 1.7501532165034255e-05, |
|
"loss": 1.6748, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.6132606505895598, |
|
"grad_norm": 0.15692903101444244, |
|
"learning_rate": 1.736356566696186e-05, |
|
"loss": 1.6713, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.6150382176927179, |
|
"grad_norm": 0.15300609171390533, |
|
"learning_rate": 1.7225855200512113e-05, |
|
"loss": 1.6673, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.6168157847958761, |
|
"grad_norm": 0.18005123734474182, |
|
"learning_rate": 1.7088405382791988e-05, |
|
"loss": 1.6638, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.6185933518990342, |
|
"grad_norm": 0.13681212067604065, |
|
"learning_rate": 1.6951220822169514e-05, |
|
"loss": 1.6638, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.6203709190021923, |
|
"grad_norm": 0.15197409689426422, |
|
"learning_rate": 1.681430611811928e-05, |
|
"loss": 1.6831, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.6221484861053505, |
|
"grad_norm": 0.14276647567749023, |
|
"learning_rate": 1.667766586106822e-05, |
|
"loss": 1.6715, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.6239260532085086, |
|
"grad_norm": 0.1536717563867569, |
|
"learning_rate": 1.654130463224171e-05, |
|
"loss": 1.6608, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.6257036203116668, |
|
"grad_norm": 0.14087150990962982, |
|
"learning_rate": 1.6405227003509966e-05, |
|
"loss": 1.671, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.627481187414825, |
|
"grad_norm": 0.1598573476076126, |
|
"learning_rate": 1.6269437537234758e-05, |
|
"loss": 1.6824, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.6292587545179831, |
|
"grad_norm": 0.14146994054317474, |
|
"learning_rate": 1.613394078611646e-05, |
|
"loss": 1.6736, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.6310363216211412, |
|
"grad_norm": 0.1572994738817215, |
|
"learning_rate": 1.599874129304138e-05, |
|
"loss": 1.6656, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.6328138887242993, |
|
"grad_norm": 0.1504960060119629, |
|
"learning_rate": 1.5863843590929483e-05, |
|
"loss": 1.6639, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.6345914558274575, |
|
"grad_norm": 0.17305798828601837, |
|
"learning_rate": 1.572925220258239e-05, |
|
"loss": 1.6563, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.6363690229306156, |
|
"grad_norm": 0.19001583755016327, |
|
"learning_rate": 1.5594971640531735e-05, |
|
"loss": 1.6694, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.6381465900337737, |
|
"grad_norm": 0.16306428611278534, |
|
"learning_rate": 1.5461006406887892e-05, |
|
"loss": 1.6597, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.639924157136932, |
|
"grad_norm": 0.1538590043783188, |
|
"learning_rate": 1.532736099318901e-05, |
|
"loss": 1.6573, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.6417017242400901, |
|
"grad_norm": 0.14714497327804565, |
|
"learning_rate": 1.5194039880250432e-05, |
|
"loss": 1.6647, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.6434792913432482, |
|
"grad_norm": 0.1425635814666748, |
|
"learning_rate": 1.5061047538014466e-05, |
|
"loss": 1.67, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.6452568584464063, |
|
"grad_norm": 0.14618100225925446, |
|
"learning_rate": 1.4928388425400514e-05, |
|
"loss": 1.6456, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.6470344255495645, |
|
"grad_norm": 0.13993218541145325, |
|
"learning_rate": 1.479606699015556e-05, |
|
"loss": 1.6603, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.6488119926527226, |
|
"grad_norm": 0.1687382161617279, |
|
"learning_rate": 1.4664087668705082e-05, |
|
"loss": 1.6684, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.6505895597558807, |
|
"grad_norm": 0.1678340882062912, |
|
"learning_rate": 1.453245488600427e-05, |
|
"loss": 1.6762, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.652367126859039, |
|
"grad_norm": 0.15924805402755737, |
|
"learning_rate": 1.4401173055389722e-05, |
|
"loss": 1.6775, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.6541446939621971, |
|
"grad_norm": 0.149272158741951, |
|
"learning_rate": 1.42702465784314e-05, |
|
"loss": 1.6607, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.6559222610653552, |
|
"grad_norm": 0.16075880825519562, |
|
"learning_rate": 1.4139679844785125e-05, |
|
"loss": 1.6526, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.6576998281685134, |
|
"grad_norm": 0.15068253874778748, |
|
"learning_rate": 1.4009477232045356e-05, |
|
"loss": 1.6502, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.6594773952716715, |
|
"grad_norm": 0.15616253018379211, |
|
"learning_rate": 1.387964310559845e-05, |
|
"loss": 1.6681, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.6612549623748296, |
|
"grad_norm": 0.16503843665122986, |
|
"learning_rate": 1.3750181818476282e-05, |
|
"loss": 1.6583, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.6630325294779877, |
|
"grad_norm": 0.1866072565317154, |
|
"learning_rate": 1.36210977112103e-05, |
|
"loss": 1.662, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.664810096581146, |
|
"grad_norm": 0.14251679182052612, |
|
"learning_rate": 1.3492395111686013e-05, |
|
"loss": 1.6654, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.6665876636843041, |
|
"grad_norm": 0.3741336464881897, |
|
"learning_rate": 1.3364078334997871e-05, |
|
"loss": 1.6506, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.6683652307874622, |
|
"grad_norm": 0.15886838734149933, |
|
"learning_rate": 1.3236151683304582e-05, |
|
"loss": 1.6593, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.6701427978906204, |
|
"grad_norm": 0.1486154943704605, |
|
"learning_rate": 1.3108619445684905e-05, |
|
"loss": 1.6752, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.6719203649937785, |
|
"grad_norm": 0.14979256689548492, |
|
"learning_rate": 1.2981485897993812e-05, |
|
"loss": 1.6614, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.6736979320969366, |
|
"grad_norm": 0.14133110642433167, |
|
"learning_rate": 1.2854755302719146e-05, |
|
"loss": 1.6715, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.6754754992000948, |
|
"grad_norm": 0.16530480980873108, |
|
"learning_rate": 1.2728431908838707e-05, |
|
"loss": 1.6732, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.677253066303253, |
|
"grad_norm": 0.13394702970981598, |
|
"learning_rate": 1.2602519951677793e-05, |
|
"loss": 1.6682, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.6790306334064111, |
|
"grad_norm": 0.1656082421541214, |
|
"learning_rate": 1.2477023652767197e-05, |
|
"loss": 1.6655, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.6808082005095693, |
|
"grad_norm": 0.18027155101299286, |
|
"learning_rate": 1.2351947219701676e-05, |
|
"loss": 1.6624, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.6825857676127274, |
|
"grad_norm": 0.1868833750486374, |
|
"learning_rate": 1.2227294845998873e-05, |
|
"loss": 1.6671, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.6843633347158855, |
|
"grad_norm": 0.14410994946956635, |
|
"learning_rate": 1.2103070710958724e-05, |
|
"loss": 1.6698, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.6861409018190436, |
|
"grad_norm": 0.16073070466518402, |
|
"learning_rate": 1.1980514742799768e-05, |
|
"loss": 1.6697, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.6879184689222018, |
|
"grad_norm": 0.13548509776592255, |
|
"learning_rate": 1.1857155179376509e-05, |
|
"loss": 1.6619, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.68969603602536, |
|
"grad_norm": 0.20386448502540588, |
|
"learning_rate": 1.1734236264525464e-05, |
|
"loss": 1.6802, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.6914736031285181, |
|
"grad_norm": 0.13660947978496552, |
|
"learning_rate": 1.1611762119427785e-05, |
|
"loss": 1.6718, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.6932511702316763, |
|
"grad_norm": 0.16920311748981476, |
|
"learning_rate": 1.1489736850352542e-05, |
|
"loss": 1.6529, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.6950287373348344, |
|
"grad_norm": 0.1494123786687851, |
|
"learning_rate": 1.1368164548519047e-05, |
|
"loss": 1.6876, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.6968063044379925, |
|
"grad_norm": 0.16994184255599976, |
|
"learning_rate": 1.1247049289959693e-05, |
|
"loss": 1.6635, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.6985838715411506, |
|
"grad_norm": 0.16828663647174835, |
|
"learning_rate": 1.1126395135383297e-05, |
|
"loss": 1.6505, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.7003614386443088, |
|
"grad_norm": 0.1685681790113449, |
|
"learning_rate": 1.1006206130038932e-05, |
|
"loss": 1.6674, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.702139005747467, |
|
"grad_norm": 0.14324048161506653, |
|
"learning_rate": 1.0886486303580332e-05, |
|
"loss": 1.6712, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.7039165728506251, |
|
"grad_norm": 0.14247146248817444, |
|
"learning_rate": 1.0767239669930756e-05, |
|
"loss": 1.6638, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.7056941399537833, |
|
"grad_norm": 0.18177978694438934, |
|
"learning_rate": 1.0648470227148434e-05, |
|
"loss": 1.6489, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.7074717070569414, |
|
"grad_norm": 0.13986016809940338, |
|
"learning_rate": 1.0530181957292506e-05, |
|
"loss": 1.6603, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.7092492741600995, |
|
"grad_norm": 0.22386716306209564, |
|
"learning_rate": 1.0412378826289529e-05, |
|
"loss": 1.6701, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.7110268412632577, |
|
"grad_norm": 0.1482144594192505, |
|
"learning_rate": 1.0295064783800485e-05, |
|
"loss": 1.6711, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.7128044083664158, |
|
"grad_norm": 0.1389176994562149, |
|
"learning_rate": 1.0178243763088382e-05, |
|
"loss": 1.6643, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.714581975469574, |
|
"grad_norm": 0.16256819665431976, |
|
"learning_rate": 1.0061919680886375e-05, |
|
"loss": 1.6737, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.7163595425727322, |
|
"grad_norm": 0.1738821119070053, |
|
"learning_rate": 9.946096437266427e-06, |
|
"loss": 1.6854, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.7181371096758903, |
|
"grad_norm": 0.1526288390159607, |
|
"learning_rate": 9.830777915508584e-06, |
|
"loss": 1.6535, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.7199146767790484, |
|
"grad_norm": 0.13686831295490265, |
|
"learning_rate": 9.71596798197075e-06, |
|
"loss": 1.6807, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.7216922438822065, |
|
"grad_norm": 0.1469413936138153, |
|
"learning_rate": 9.602810911756332e-06, |
|
"loss": 1.675, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.7234698109853647, |
|
"grad_norm": 0.14766348898410797, |
|
"learning_rate": 9.489024503787308e-06, |
|
"loss": 1.6554, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.7252473780885228, |
|
"grad_norm": 0.15909671783447266, |
|
"learning_rate": 9.375758142223712e-06, |
|
"loss": 1.6665, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.727024945191681, |
|
"grad_norm": 0.17711246013641357, |
|
"learning_rate": 9.263015624619362e-06, |
|
"loss": 1.66, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.7288025122948392, |
|
"grad_norm": 0.15886163711547852, |
|
"learning_rate": 9.150800730964821e-06, |
|
"loss": 1.6639, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.7305800793979973, |
|
"grad_norm": 0.14557142555713654, |
|
"learning_rate": 9.039117223560666e-06, |
|
"loss": 1.677, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.7323576465011554, |
|
"grad_norm": 0.15475749969482422, |
|
"learning_rate": 8.927968846891351e-06, |
|
"loss": 1.6677, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.7341352136043136, |
|
"grad_norm": 0.1995362639427185, |
|
"learning_rate": 8.817359327499659e-06, |
|
"loss": 1.6635, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.7359127807074717, |
|
"grad_norm": 0.16061349213123322, |
|
"learning_rate": 8.70729237386175e-06, |
|
"loss": 1.6696, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.7376903478106298, |
|
"grad_norm": 0.1676475554704666, |
|
"learning_rate": 8.597771676262848e-06, |
|
"loss": 1.6609, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.739467914913788, |
|
"grad_norm": 0.28265243768692017, |
|
"learning_rate": 8.488800906673493e-06, |
|
"loss": 1.657, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.7412454820169462, |
|
"grad_norm": 0.15924739837646484, |
|
"learning_rate": 8.380383718626441e-06, |
|
"loss": 1.661, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.7430230491201043, |
|
"grad_norm": 0.15197895467281342, |
|
"learning_rate": 8.27252374709416e-06, |
|
"loss": 1.6663, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.7448006162232624, |
|
"grad_norm": 0.15987786650657654, |
|
"learning_rate": 8.165224608366981e-06, |
|
"loss": 1.6657, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.7465781833264206, |
|
"grad_norm": 0.14073099195957184, |
|
"learning_rate": 8.058489899931795e-06, |
|
"loss": 1.6596, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.7483557504295787, |
|
"grad_norm": 0.148057222366333, |
|
"learning_rate": 7.95232320035152e-06, |
|
"loss": 1.6642, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.7501333175327368, |
|
"grad_norm": 0.17104440927505493, |
|
"learning_rate": 7.846728069145052e-06, |
|
"loss": 1.6587, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.7519108846358951, |
|
"grad_norm": 0.1572682410478592, |
|
"learning_rate": 7.741708046667947e-06, |
|
"loss": 1.6748, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.7536884517390532, |
|
"grad_norm": 0.17384777963161469, |
|
"learning_rate": 7.637266653993755e-06, |
|
"loss": 1.6731, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.7554660188422113, |
|
"grad_norm": 0.15057361125946045, |
|
"learning_rate": 7.533407392795896e-06, |
|
"loss": 1.6753, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.7572435859453694, |
|
"grad_norm": 0.17582474648952484, |
|
"learning_rate": 7.431163571532962e-06, |
|
"loss": 1.6614, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.7590211530485276, |
|
"grad_norm": 0.16274411976337433, |
|
"learning_rate": 7.328473092285082e-06, |
|
"loss": 1.6622, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.7607987201516857, |
|
"grad_norm": 0.14647479355335236, |
|
"learning_rate": 7.226375097632967e-06, |
|
"loss": 1.6672, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.7625762872548438, |
|
"grad_norm": 0.14855672419071198, |
|
"learning_rate": 7.124873010681446e-06, |
|
"loss": 1.6734, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.764353854358002, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 7.0239702345559766e-06, |
|
"loss": 1.6629, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.7661314214611602, |
|
"grad_norm": 0.15817640721797943, |
|
"learning_rate": 6.923670152288514e-06, |
|
"loss": 1.6726, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.7679089885643183, |
|
"grad_norm": 0.14252114295959473, |
|
"learning_rate": 6.823976126704137e-06, |
|
"loss": 1.6561, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.7696865556674765, |
|
"grad_norm": 0.16966678202152252, |
|
"learning_rate": 6.724891500308264e-06, |
|
"loss": 1.6703, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.7714641227706346, |
|
"grad_norm": 0.1773873120546341, |
|
"learning_rate": 6.626419595174596e-06, |
|
"loss": 1.6519, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.7732416898737927, |
|
"grad_norm": 0.15773586928844452, |
|
"learning_rate": 6.528563712833738e-06, |
|
"loss": 1.6652, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.7750192569769508, |
|
"grad_norm": 0.13970991969108582, |
|
"learning_rate": 6.431327134162498e-06, |
|
"loss": 1.6402, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.776796824080109, |
|
"grad_norm": 0.20692099630832672, |
|
"learning_rate": 6.3347131192739105e-06, |
|
"loss": 1.6667, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.7785743911832672, |
|
"grad_norm": 0.14494654536247253, |
|
"learning_rate": 6.238724907407897e-06, |
|
"loss": 1.6598, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.7803519582864253, |
|
"grad_norm": 0.15285950899124146, |
|
"learning_rate": 6.143365716822691e-06, |
|
"loss": 1.6675, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.7821295253895835, |
|
"grad_norm": 0.1541031152009964, |
|
"learning_rate": 6.048638744686922e-06, |
|
"loss": 1.6609, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.7839070924927416, |
|
"grad_norm": 0.16380751132965088, |
|
"learning_rate": 5.954547166972424e-06, |
|
"loss": 1.6751, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.7856846595958997, |
|
"grad_norm": 0.1482234001159668, |
|
"learning_rate": 5.8610941383477615e-06, |
|
"loss": 1.6584, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.7874622266990579, |
|
"grad_norm": 0.15207888185977936, |
|
"learning_rate": 5.768282792072455e-06, |
|
"loss": 1.6495, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.789239793802216, |
|
"grad_norm": 0.14984245598316193, |
|
"learning_rate": 5.6761162398919264e-06, |
|
"loss": 1.6837, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.7910173609053742, |
|
"grad_norm": 0.18983450531959534, |
|
"learning_rate": 5.584597571933176e-06, |
|
"loss": 1.6604, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.7927949280085324, |
|
"grad_norm": 0.14986811578273773, |
|
"learning_rate": 5.493729856601171e-06, |
|
"loss": 1.6734, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.7945724951116905, |
|
"grad_norm": 0.16871729493141174, |
|
"learning_rate": 5.4035161404759755e-06, |
|
"loss": 1.6535, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.7963500622148486, |
|
"grad_norm": 0.1682252287864685, |
|
"learning_rate": 5.313959448210609e-06, |
|
"loss": 1.654, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.7981276293180067, |
|
"grad_norm": 0.16398753225803375, |
|
"learning_rate": 5.225062782429624e-06, |
|
"loss": 1.6665, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.7999051964211649, |
|
"grad_norm": 0.16041302680969238, |
|
"learning_rate": 5.136829123628442e-06, |
|
"loss": 1.6668, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.801682763524323, |
|
"grad_norm": 0.14892232418060303, |
|
"learning_rate": 5.049261430073432e-06, |
|
"loss": 1.6698, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.8034603306274812, |
|
"grad_norm": 0.15795736014842987, |
|
"learning_rate": 4.962362637702711e-06, |
|
"loss": 1.6449, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.8052378977306394, |
|
"grad_norm": 0.14031356573104858, |
|
"learning_rate": 4.8761356600277284e-06, |
|
"loss": 1.6736, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.8070154648337975, |
|
"grad_norm": 0.14954744279384613, |
|
"learning_rate": 4.790583388035561e-06, |
|
"loss": 1.6595, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.8087930319369556, |
|
"grad_norm": 0.1448160856962204, |
|
"learning_rate": 4.705708690092006e-06, |
|
"loss": 1.6697, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.8105705990401137, |
|
"grad_norm": 0.1565515398979187, |
|
"learning_rate": 4.621514411845399e-06, |
|
"loss": 1.6642, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.8123481661432719, |
|
"grad_norm": 0.17093238234519958, |
|
"learning_rate": 4.5380033761312e-06, |
|
"loss": 1.6697, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.81412573324643, |
|
"grad_norm": 0.14756350219249725, |
|
"learning_rate": 4.4560032277625644e-06, |
|
"loss": 1.6726, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.8159033003495882, |
|
"grad_norm": 0.15235283970832825, |
|
"learning_rate": 4.373860152031772e-06, |
|
"loss": 1.6617, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.8176808674527464, |
|
"grad_norm": 0.16607356071472168, |
|
"learning_rate": 4.292408622096306e-06, |
|
"loss": 1.6648, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.8194584345559045, |
|
"grad_norm": 0.1486140936613083, |
|
"learning_rate": 4.211651368833752e-06, |
|
"loss": 1.6664, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.8212360016590626, |
|
"grad_norm": 0.1572180539369583, |
|
"learning_rate": 4.131591099844242e-06, |
|
"loss": 1.667, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.8230135687622208, |
|
"grad_norm": 0.1429453045129776, |
|
"learning_rate": 4.052230499359672e-06, |
|
"loss": 1.6601, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.8247911358653789, |
|
"grad_norm": 0.2002544403076172, |
|
"learning_rate": 3.973572228153693e-06, |
|
"loss": 1.6767, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.826568702968537, |
|
"grad_norm": 0.2108883261680603, |
|
"learning_rate": 3.895618923452526e-06, |
|
"loss": 1.6682, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.8283462700716953, |
|
"grad_norm": 0.16697534918785095, |
|
"learning_rate": 3.818373198846526e-06, |
|
"loss": 1.6739, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.8301238371748534, |
|
"grad_norm": 0.21570728719234467, |
|
"learning_rate": 3.741837644202542e-06, |
|
"loss": 1.6602, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.8319014042780115, |
|
"grad_norm": 0.14422467350959778, |
|
"learning_rate": 3.6660148255771187e-06, |
|
"loss": 1.6702, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.8336789713811696, |
|
"grad_norm": 0.19726932048797607, |
|
"learning_rate": 3.590907285130435e-06, |
|
"loss": 1.6776, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.8354565384843278, |
|
"grad_norm": 0.15185341238975525, |
|
"learning_rate": 3.5165175410410838e-06, |
|
"loss": 1.6568, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.8372341055874859, |
|
"grad_norm": 0.13279466331005096, |
|
"learning_rate": 3.4428480874216407e-06, |
|
"loss": 1.6847, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.839011672690644, |
|
"grad_norm": 0.15089605748653412, |
|
"learning_rate": 3.3699013942350367e-06, |
|
"loss": 1.6742, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.8407892397938023, |
|
"grad_norm": 0.17505663633346558, |
|
"learning_rate": 3.2976799072117564e-06, |
|
"loss": 1.6749, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.8425668068969604, |
|
"grad_norm": 0.1557130366563797, |
|
"learning_rate": 3.226186047767829e-06, |
|
"loss": 1.6768, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.8443443740001185, |
|
"grad_norm": 0.17593321204185486, |
|
"learning_rate": 3.1554222129236505e-06, |
|
"loss": 1.6651, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.8461219411032767, |
|
"grad_norm": 0.14146077632904053, |
|
"learning_rate": 3.0853907752236123e-06, |
|
"loss": 1.6463, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.8478995082064348, |
|
"grad_norm": 0.16316835582256317, |
|
"learning_rate": 3.0160940826565566e-06, |
|
"loss": 1.665, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.8496770753095929, |
|
"grad_norm": 0.15772958099842072, |
|
"learning_rate": 2.947534458577067e-06, |
|
"loss": 1.6691, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.851454642412751, |
|
"grad_norm": 0.15084761381149292, |
|
"learning_rate": 2.879714201627548e-06, |
|
"loss": 1.6562, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.8532322095159093, |
|
"grad_norm": 0.1439027339220047, |
|
"learning_rate": 2.812635585661169e-06, |
|
"loss": 1.6736, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.8550097766190674, |
|
"grad_norm": 0.14739972352981567, |
|
"learning_rate": 2.746300859665632e-06, |
|
"loss": 1.656, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.8567873437222255, |
|
"grad_norm": 0.14110083878040314, |
|
"learning_rate": 2.6807122476877637e-06, |
|
"loss": 1.6568, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.8585649108253837, |
|
"grad_norm": 0.17874117195606232, |
|
"learning_rate": 2.6158719487589467e-06, |
|
"loss": 1.6855, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.8603424779285418, |
|
"grad_norm": 0.20830568671226501, |
|
"learning_rate": 2.5517821368213927e-06, |
|
"loss": 1.6613, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.8621200450316999, |
|
"grad_norm": 0.24184918403625488, |
|
"learning_rate": 2.4884449606552564e-06, |
|
"loss": 1.6665, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.863897612134858, |
|
"grad_norm": 0.18940667808055878, |
|
"learning_rate": 2.4258625438065898e-06, |
|
"loss": 1.6668, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.8656751792380163, |
|
"grad_norm": 0.1848708689212799, |
|
"learning_rate": 2.3640369845161464e-06, |
|
"loss": 1.6668, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.8674527463411744, |
|
"grad_norm": 0.14313864707946777, |
|
"learning_rate": 2.302970355649034e-06, |
|
"loss": 1.6648, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.8692303134443325, |
|
"grad_norm": 0.144060418009758, |
|
"learning_rate": 2.242664704625216e-06, |
|
"loss": 1.6684, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.8710078805474907, |
|
"grad_norm": 0.1597507745027542, |
|
"learning_rate": 2.1831220533508556e-06, |
|
"loss": 1.6655, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.8727854476506488, |
|
"grad_norm": 0.1441722959280014, |
|
"learning_rate": 2.124344398150546e-06, |
|
"loss": 1.6778, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.8745630147538069, |
|
"grad_norm": 0.16209940612316132, |
|
"learning_rate": 2.0663337097003576e-06, |
|
"loss": 1.6608, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.8763405818569651, |
|
"grad_norm": 0.33218371868133545, |
|
"learning_rate": 2.0090919329617876e-06, |
|
"loss": 1.6411, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.8781181489601232, |
|
"grad_norm": 0.1453067809343338, |
|
"learning_rate": 1.9526209871165184e-06, |
|
"loss": 1.6652, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.8798957160632814, |
|
"grad_norm": 0.18067798018455505, |
|
"learning_rate": 1.8969227655021098e-06, |
|
"loss": 1.6777, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.8816732831664396, |
|
"grad_norm": 0.14852333068847656, |
|
"learning_rate": 1.8419991355484945e-06, |
|
"loss": 1.6616, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.8834508502695977, |
|
"grad_norm": 0.17207376658916473, |
|
"learning_rate": 1.7878519387153763e-06, |
|
"loss": 1.6693, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.8852284173727558, |
|
"grad_norm": Infinity, |
|
"learning_rate": 1.7350128216860744e-06, |
|
"loss": 1.6699, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.8870059844759139, |
|
"grad_norm": 0.1677992343902588, |
|
"learning_rate": 1.6824161021340963e-06, |
|
"loss": 1.662, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.8887835515790721, |
|
"grad_norm": 0.1469300240278244, |
|
"learning_rate": 1.6306011661451375e-06, |
|
"loss": 1.6804, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.8905611186822302, |
|
"grad_norm": 0.17192143201828003, |
|
"learning_rate": 1.5795697509517316e-06, |
|
"loss": 1.6682, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.8923386857853884, |
|
"grad_norm": 0.16549953818321228, |
|
"learning_rate": 1.529323567516805e-06, |
|
"loss": 1.6442, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.8941162528885466, |
|
"grad_norm": 0.21391679346561432, |
|
"learning_rate": 1.4803549924437943e-06, |
|
"loss": 1.6649, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.8958938199917047, |
|
"grad_norm": 0.14870049059391022, |
|
"learning_rate": 1.4316764061822001e-06, |
|
"loss": 1.6526, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.8976713870948628, |
|
"grad_norm": 0.14293646812438965, |
|
"learning_rate": 1.3837880101939342e-06, |
|
"loss": 1.6585, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.899448954198021, |
|
"grad_norm": 0.19570215046405792, |
|
"learning_rate": 1.3366914100639061e-06, |
|
"loss": 1.6568, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.9012265213011791, |
|
"grad_norm": 0.13882361352443695, |
|
"learning_rate": 1.2903881848299797e-06, |
|
"loss": 1.6541, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.9030040884043372, |
|
"grad_norm": 0.1370488852262497, |
|
"learning_rate": 1.244879886930031e-06, |
|
"loss": 1.6625, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.9047816555074955, |
|
"grad_norm": 0.18005253374576569, |
|
"learning_rate": 1.200168042149899e-06, |
|
"loss": 1.6708, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.9065592226106536, |
|
"grad_norm": 0.18435104191303253, |
|
"learning_rate": 1.156254149572225e-06, |
|
"loss": 1.6642, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.9083367897138117, |
|
"grad_norm": 0.1873762458562851, |
|
"learning_rate": 1.1131396815261985e-06, |
|
"loss": 1.6561, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.9101143568169698, |
|
"grad_norm": 0.1366182565689087, |
|
"learning_rate": 1.0708260835381927e-06, |
|
"loss": 1.6456, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.911891923920128, |
|
"grad_norm": 0.16981613636016846, |
|
"learning_rate": 1.0293147742832966e-06, |
|
"loss": 1.6732, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.9136694910232861, |
|
"grad_norm": 0.18389706313610077, |
|
"learning_rate": 9.88607145537751e-07, |
|
"loss": 1.6679, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.9154470581264442, |
|
"grad_norm": 0.15548266470432281, |
|
"learning_rate": 9.487045621322799e-07, |
|
"loss": 1.6619, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.9172246252296025, |
|
"grad_norm": 0.18912291526794434, |
|
"learning_rate": 9.096083619063473e-07, |
|
"loss": 1.6736, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.9190021923327606, |
|
"grad_norm": 0.16964443027973175, |
|
"learning_rate": 8.713198556632885e-07, |
|
"loss": 1.6748, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.9207797594359187, |
|
"grad_norm": 0.14487318694591522, |
|
"learning_rate": 8.338403271263589e-07, |
|
"loss": 1.6692, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.9225573265390768, |
|
"grad_norm": 0.15898752212524414, |
|
"learning_rate": 7.971710328957132e-07, |
|
"loss": 1.6646, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.924334893642235, |
|
"grad_norm": 0.15839669108390808, |
|
"learning_rate": 7.613132024062469e-07, |
|
"loss": 1.662, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.9261124607453931, |
|
"grad_norm": 0.15475721657276154, |
|
"learning_rate": 7.262680378864017e-07, |
|
"loss": 1.6671, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.9278900278485512, |
|
"grad_norm": 0.1529570072889328, |
|
"learning_rate": 6.920367143178452e-07, |
|
"loss": 1.6726, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.9296675949517095, |
|
"grad_norm": 0.1656341701745987, |
|
"learning_rate": 6.586203793960771e-07, |
|
"loss": 1.6623, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.9314451620548676, |
|
"grad_norm": 0.1463242918252945, |
|
"learning_rate": 6.260201534919491e-07, |
|
"loss": 1.6662, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.9332227291580257, |
|
"grad_norm": 0.13823845982551575, |
|
"learning_rate": 5.942371296141058e-07, |
|
"loss": 1.6453, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.9350002962611839, |
|
"grad_norm": 0.14740724861621857, |
|
"learning_rate": 5.632723733723366e-07, |
|
"loss": 1.6574, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.936777863364342, |
|
"grad_norm": 0.13956210017204285, |
|
"learning_rate": 5.331269229418484e-07, |
|
"loss": 1.6577, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.9385554304675001, |
|
"grad_norm": 0.14831486344337463, |
|
"learning_rate": 5.038017890284547e-07, |
|
"loss": 1.6491, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.9403329975706582, |
|
"grad_norm": 0.17318527400493622, |
|
"learning_rate": 4.75297954834697e-07, |
|
"loss": 1.6704, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.9421105646738165, |
|
"grad_norm": 0.15295392274856567, |
|
"learning_rate": 4.476163760268659e-07, |
|
"loss": 1.6386, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.9438881317769746, |
|
"grad_norm": 0.1569519191980362, |
|
"learning_rate": 4.207579807029821e-07, |
|
"loss": 1.6618, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.9456656988801327, |
|
"grad_norm": 0.14801190793514252, |
|
"learning_rate": 3.947236693616574e-07, |
|
"loss": 1.6625, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.9474432659832909, |
|
"grad_norm": 0.16047972440719604, |
|
"learning_rate": 3.697623220822066e-07, |
|
"loss": 1.6702, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.949220833086449, |
|
"grad_norm": 0.17775191366672516, |
|
"learning_rate": 3.453705075406932e-07, |
|
"loss": 1.6671, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.9509984001896071, |
|
"grad_norm": 0.15378819406032562, |
|
"learning_rate": 3.218053045458136e-07, |
|
"loss": 1.6644, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.9527759672927653, |
|
"grad_norm": 0.14244551956653595, |
|
"learning_rate": 2.990675031832174e-07, |
|
"loss": 1.6791, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.9545535343959235, |
|
"grad_norm": 0.1715448796749115, |
|
"learning_rate": 2.7715786579772527e-07, |
|
"loss": 1.6614, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.9563311014990816, |
|
"grad_norm": 0.15638667345046997, |
|
"learning_rate": 2.560771269677742e-07, |
|
"loss": 1.6782, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.9581086686022398, |
|
"grad_norm": 0.18975552916526794, |
|
"learning_rate": 2.358259934807927e-07, |
|
"loss": 1.6656, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.9598862357053979, |
|
"grad_norm": 0.14271163940429688, |
|
"learning_rate": 2.1640514430950055e-07, |
|
"loss": 1.6574, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.961663802808556, |
|
"grad_norm": 0.20691907405853271, |
|
"learning_rate": 1.978152305891351e-07, |
|
"loss": 1.6523, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.9634413699117141, |
|
"grad_norm": 0.15907599031925201, |
|
"learning_rate": 1.8005687559563834e-07, |
|
"loss": 1.6763, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.9652189370148723, |
|
"grad_norm": 0.13600093126296997, |
|
"learning_rate": 1.6313067472474576e-07, |
|
"loss": 1.6771, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.9669965041180305, |
|
"grad_norm": 0.157552108168602, |
|
"learning_rate": 1.470371954720301e-07, |
|
"loss": 1.6601, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.9687740712211886, |
|
"grad_norm": 0.14566664397716522, |
|
"learning_rate": 1.3177697741387218e-07, |
|
"loss": 1.6758, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.9705516383243468, |
|
"grad_norm": 0.16759639978408813, |
|
"learning_rate": 1.1735053218937808e-07, |
|
"loss": 1.6591, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.9723292054275049, |
|
"grad_norm": 0.16849561035633087, |
|
"learning_rate": 1.0375834348320401e-07, |
|
"loss": 1.6756, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.974106772530663, |
|
"grad_norm": 0.17653703689575195, |
|
"learning_rate": 9.100086700936649e-08, |
|
"loss": 1.6621, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.9758843396338212, |
|
"grad_norm": 0.17266112565994263, |
|
"learning_rate": 7.907853049594905e-08, |
|
"loss": 1.673, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.9776619067369793, |
|
"grad_norm": 0.1719112992286682, |
|
"learning_rate": 6.799173367075528e-08, |
|
"loss": 1.6574, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.9794394738401375, |
|
"grad_norm": 0.1552852988243103, |
|
"learning_rate": 5.774084824792247e-08, |
|
"loss": 1.6673, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.9812170409432956, |
|
"grad_norm": 0.1883048564195633, |
|
"learning_rate": 4.8326217915448114e-08, |
|
"loss": 1.6688, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.9829946080464538, |
|
"grad_norm": 0.16423700749874115, |
|
"learning_rate": 3.97481583236714e-08, |
|
"loss": 1.6706, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.9847721751496119, |
|
"grad_norm": 0.14842714369297028, |
|
"learning_rate": 3.2006957074690035e-08, |
|
"loss": 1.6586, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.98654974225277, |
|
"grad_norm": 0.13827410340309143, |
|
"learning_rate": 2.510287371270681e-08, |
|
"loss": 1.6537, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.9883273093559282, |
|
"grad_norm": 0.14455120265483856, |
|
"learning_rate": 1.903613971535323e-08, |
|
"loss": 1.6689, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.9901048764590863, |
|
"grad_norm": 0.14416253566741943, |
|
"learning_rate": 1.385510381303745e-08, |
|
"loss": 1.6618, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.9918824435622444, |
|
"grad_norm": 0.14247511327266693, |
|
"learning_rate": 9.455272617062139e-09, |
|
"loss": 1.6753, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.9936600106654027, |
|
"grad_norm": 0.15325996279716492, |
|
"learning_rate": 5.893315412855427e-09, |
|
"loss": 1.6689, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.9954375777685608, |
|
"grad_norm": 0.17999306321144104, |
|
"learning_rate": 3.169351624432437e-09, |
|
"loss": 1.6623, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.9972151448717189, |
|
"grad_norm": 0.27596476674079895, |
|
"learning_rate": 1.283472579871603e-09, |
|
"loss": 1.67, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.998992711974877, |
|
"grad_norm": 0.14970412850379944, |
|
"learning_rate": 2.3574150824490215e-10, |
|
"loss": 1.6558, |
|
"step": 56200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 56256, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6446037739742167e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|