|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 240, |
|
"global_step": 958, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010438413361169101, |
|
"grad_norm": 1.4100897312164307, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0427, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010438413361169101, |
|
"eval_loss": 1.9999854564666748, |
|
"eval_runtime": 81.2923, |
|
"eval_samples_per_second": 19.854, |
|
"eval_steps_per_second": 9.927, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0020876826722338203, |
|
"grad_norm": 2.131267547607422, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2661, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003131524008350731, |
|
"grad_norm": 2.1016671657562256, |
|
"learning_rate": 6e-05, |
|
"loss": 1.0965, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0041753653444676405, |
|
"grad_norm": 2.6869776248931885, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1197, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005219206680584551, |
|
"grad_norm": 2.6327548027038574, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9191, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006263048016701462, |
|
"grad_norm": 1.9485589265823364, |
|
"learning_rate": 0.00012, |
|
"loss": 0.7709, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007306889352818371, |
|
"grad_norm": 1.7759828567504883, |
|
"learning_rate": 0.00014, |
|
"loss": 0.6302, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008350730688935281, |
|
"grad_norm": 1.4292073249816895, |
|
"learning_rate": 0.00016, |
|
"loss": 0.604, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009394572025052192, |
|
"grad_norm": 1.8719030618667603, |
|
"learning_rate": 0.00018, |
|
"loss": 0.5034, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010438413361169102, |
|
"grad_norm": 2.183128833770752, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5065, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011482254697286013, |
|
"grad_norm": 2.187234401702881, |
|
"learning_rate": 0.00019999945089843994, |
|
"loss": 0.4966, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012526096033402923, |
|
"grad_norm": 2.7468483448028564, |
|
"learning_rate": 0.00019999780359979, |
|
"loss": 0.3204, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.013569937369519834, |
|
"grad_norm": 2.2195403575897217, |
|
"learning_rate": 0.00019999505812214085, |
|
"loss": 0.2784, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014613778705636743, |
|
"grad_norm": 2.0797836780548096, |
|
"learning_rate": 0.00019999121449564347, |
|
"loss": 0.3631, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.015657620041753653, |
|
"grad_norm": 2.080904245376587, |
|
"learning_rate": 0.00019998627276250858, |
|
"loss": 0.2425, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016701461377870562, |
|
"grad_norm": 1.358616590499878, |
|
"learning_rate": 0.00019998023297700658, |
|
"loss": 0.2799, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.017745302713987474, |
|
"grad_norm": 1.4436254501342773, |
|
"learning_rate": 0.00019997309520546647, |
|
"loss": 0.3314, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.018789144050104383, |
|
"grad_norm": 1.1498231887817383, |
|
"learning_rate": 0.00019996485952627552, |
|
"loss": 0.2713, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019832985386221295, |
|
"grad_norm": 1.3239034414291382, |
|
"learning_rate": 0.00019995552602987827, |
|
"loss": 0.2347, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.020876826722338204, |
|
"grad_norm": 1.1880042552947998, |
|
"learning_rate": 0.00019994509481877537, |
|
"loss": 0.3311, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021920668058455117, |
|
"grad_norm": 1.1707823276519775, |
|
"learning_rate": 0.00019993356600752276, |
|
"loss": 0.4601, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.022964509394572025, |
|
"grad_norm": 1.6230024099349976, |
|
"learning_rate": 0.00019992093972273018, |
|
"loss": 0.4187, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.024008350730688934, |
|
"grad_norm": 1.6155649423599243, |
|
"learning_rate": 0.00019990721610305996, |
|
"loss": 0.4661, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.025052192066805846, |
|
"grad_norm": 1.4404484033584595, |
|
"learning_rate": 0.0001998923952992252, |
|
"loss": 0.2662, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.026096033402922755, |
|
"grad_norm": 1.0615559816360474, |
|
"learning_rate": 0.00019987647747398852, |
|
"loss": 0.3144, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.027139874739039668, |
|
"grad_norm": 2.5796847343444824, |
|
"learning_rate": 0.00019985946280215994, |
|
"loss": 0.9688, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.028183716075156576, |
|
"grad_norm": 3.5270519256591797, |
|
"learning_rate": 0.00019984135147059514, |
|
"loss": 1.1566, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.029227557411273485, |
|
"grad_norm": 3.404811143875122, |
|
"learning_rate": 0.00019982214367819328, |
|
"loss": 0.9788, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.030271398747390398, |
|
"grad_norm": 2.5648179054260254, |
|
"learning_rate": 0.00019980183963589504, |
|
"loss": 0.9095, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.031315240083507306, |
|
"grad_norm": 2.203162670135498, |
|
"learning_rate": 0.0001997804395666799, |
|
"loss": 0.6835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.032359081419624215, |
|
"grad_norm": 2.2330455780029297, |
|
"learning_rate": 0.00019975794370556417, |
|
"loss": 0.7176, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.033402922755741124, |
|
"grad_norm": 1.7634390592575073, |
|
"learning_rate": 0.00019973435229959813, |
|
"loss": 0.6717, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03444676409185804, |
|
"grad_norm": 1.7378387451171875, |
|
"learning_rate": 0.00019970966560786324, |
|
"loss": 0.6573, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03549060542797495, |
|
"grad_norm": 1.515835165977478, |
|
"learning_rate": 0.0001996838839014696, |
|
"loss": 0.5787, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03653444676409186, |
|
"grad_norm": 1.6372636556625366, |
|
"learning_rate": 0.0001996570074635527, |
|
"loss": 0.5711, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.037578288100208766, |
|
"grad_norm": 1.449986457824707, |
|
"learning_rate": 0.00019962903658927037, |
|
"loss": 0.4957, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.038622129436325675, |
|
"grad_norm": 1.544659972190857, |
|
"learning_rate": 0.00019959997158579967, |
|
"loss": 0.5339, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03966597077244259, |
|
"grad_norm": 1.497573733329773, |
|
"learning_rate": 0.0001995698127723334, |
|
"loss": 0.3736, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0407098121085595, |
|
"grad_norm": 1.7716635465621948, |
|
"learning_rate": 0.00019953856048007652, |
|
"loss": 0.5047, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04175365344467641, |
|
"grad_norm": 1.439935326576233, |
|
"learning_rate": 0.00019950621505224273, |
|
"loss": 0.3242, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04279749478079332, |
|
"grad_norm": 1.5024681091308594, |
|
"learning_rate": 0.00019947277684405056, |
|
"loss": 0.3943, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04384133611691023, |
|
"grad_norm": 1.411978006362915, |
|
"learning_rate": 0.00019943824622271935, |
|
"loss": 0.2535, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04488517745302714, |
|
"grad_norm": 1.6485614776611328, |
|
"learning_rate": 0.00019940262356746554, |
|
"loss": 0.3503, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04592901878914405, |
|
"grad_norm": 2.0574934482574463, |
|
"learning_rate": 0.0001993659092694982, |
|
"loss": 0.3788, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04697286012526096, |
|
"grad_norm": 1.5055749416351318, |
|
"learning_rate": 0.00019932810373201495, |
|
"loss": 0.285, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04801670146137787, |
|
"grad_norm": 1.3977243900299072, |
|
"learning_rate": 0.00019928920737019733, |
|
"loss": 0.2997, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.049060542797494784, |
|
"grad_norm": 1.5702697038650513, |
|
"learning_rate": 0.00019924922061120644, |
|
"loss": 0.2235, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05010438413361169, |
|
"grad_norm": 1.1728659868240356, |
|
"learning_rate": 0.0001992081438941781, |
|
"loss": 0.277, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0511482254697286, |
|
"grad_norm": 1.6709052324295044, |
|
"learning_rate": 0.00019916597767021807, |
|
"loss": 0.2712, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05219206680584551, |
|
"grad_norm": 2.5871024131774902, |
|
"learning_rate": 0.00019912272240239716, |
|
"loss": 0.3964, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05323590814196242, |
|
"grad_norm": 2.782623291015625, |
|
"learning_rate": 0.00019907837856574607, |
|
"loss": 0.7383, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.054279749478079335, |
|
"grad_norm": 2.9450387954711914, |
|
"learning_rate": 0.0001990329466472502, |
|
"loss": 0.7801, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.055323590814196244, |
|
"grad_norm": 2.3782992362976074, |
|
"learning_rate": 0.00019898642714584428, |
|
"loss": 0.6978, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05636743215031315, |
|
"grad_norm": 2.7798867225646973, |
|
"learning_rate": 0.000198938820572407, |
|
"loss": 0.6635, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05741127348643006, |
|
"grad_norm": 1.2441174983978271, |
|
"learning_rate": 0.00019889012744975508, |
|
"loss": 0.3866, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05845511482254697, |
|
"grad_norm": 1.9317642450332642, |
|
"learning_rate": 0.00019884034831263808, |
|
"loss": 0.5378, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.059498956158663886, |
|
"grad_norm": 1.48700749874115, |
|
"learning_rate": 0.00019878948370773193, |
|
"loss": 0.416, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.060542797494780795, |
|
"grad_norm": 1.6028392314910889, |
|
"learning_rate": 0.00019873753419363336, |
|
"loss": 0.4354, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.061586638830897704, |
|
"grad_norm": 1.2003154754638672, |
|
"learning_rate": 0.00019868450034085352, |
|
"loss": 0.289, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06263048016701461, |
|
"grad_norm": 0.8975642919540405, |
|
"learning_rate": 0.00019863038273181186, |
|
"loss": 0.3655, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06367432150313153, |
|
"grad_norm": 1.0003724098205566, |
|
"learning_rate": 0.00019857518196082964, |
|
"loss": 0.3517, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06471816283924843, |
|
"grad_norm": 0.7653176784515381, |
|
"learning_rate": 0.00019851889863412345, |
|
"loss": 0.2623, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06576200417536535, |
|
"grad_norm": 0.8752651810646057, |
|
"learning_rate": 0.00019846153336979856, |
|
"loss": 0.295, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06680584551148225, |
|
"grad_norm": 0.8633437752723694, |
|
"learning_rate": 0.00019840308679784207, |
|
"loss": 0.198, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06784968684759916, |
|
"grad_norm": 0.9436039924621582, |
|
"learning_rate": 0.00019834355956011606, |
|
"loss": 0.2345, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06889352818371608, |
|
"grad_norm": 0.6003718376159668, |
|
"learning_rate": 0.00019828295231035051, |
|
"loss": 0.1637, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06993736951983298, |
|
"grad_norm": 1.460316777229309, |
|
"learning_rate": 0.00019822126571413616, |
|
"loss": 0.2315, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0709812108559499, |
|
"grad_norm": 0.840602457523346, |
|
"learning_rate": 0.00019815850044891707, |
|
"loss": 0.2587, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0720250521920668, |
|
"grad_norm": 0.9615221619606018, |
|
"learning_rate": 0.0001980946572039834, |
|
"loss": 0.3806, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07306889352818371, |
|
"grad_norm": 1.013310432434082, |
|
"learning_rate": 0.00019802973668046363, |
|
"loss": 0.2961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07411273486430063, |
|
"grad_norm": 0.9203840494155884, |
|
"learning_rate": 0.00019796373959131698, |
|
"loss": 0.2715, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07515657620041753, |
|
"grad_norm": 0.8724762201309204, |
|
"learning_rate": 0.00019789666666132554, |
|
"loss": 0.2774, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07620041753653445, |
|
"grad_norm": 0.854420006275177, |
|
"learning_rate": 0.00019782851862708634, |
|
"loss": 0.2466, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07724425887265135, |
|
"grad_norm": 0.880692720413208, |
|
"learning_rate": 0.00019775929623700318, |
|
"loss": 0.3145, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07828810020876827, |
|
"grad_norm": 1.4334697723388672, |
|
"learning_rate": 0.00019768900025127851, |
|
"loss": 0.6141, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07933194154488518, |
|
"grad_norm": 1.6129218339920044, |
|
"learning_rate": 0.0001976176314419051, |
|
"loss": 0.6577, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08037578288100208, |
|
"grad_norm": 1.4365684986114502, |
|
"learning_rate": 0.00019754519059265736, |
|
"loss": 0.6501, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.081419624217119, |
|
"grad_norm": 1.1411486864089966, |
|
"learning_rate": 0.00019747167849908304, |
|
"loss": 0.5629, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0824634655532359, |
|
"grad_norm": 0.9845925569534302, |
|
"learning_rate": 0.00019739709596849417, |
|
"loss": 0.4142, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08350730688935282, |
|
"grad_norm": 1.0630574226379395, |
|
"learning_rate": 0.00019732144381995846, |
|
"loss": 0.4291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08455114822546973, |
|
"grad_norm": 1.059112548828125, |
|
"learning_rate": 0.0001972447228842902, |
|
"loss": 0.4116, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08559498956158663, |
|
"grad_norm": 0.99774569272995, |
|
"learning_rate": 0.000197166934004041, |
|
"loss": 0.423, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08663883089770355, |
|
"grad_norm": 1.2038483619689941, |
|
"learning_rate": 0.00019708807803349088, |
|
"loss": 0.4696, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08768267223382047, |
|
"grad_norm": 1.5743309259414673, |
|
"learning_rate": 0.00019700815583863852, |
|
"loss": 0.4688, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08872651356993737, |
|
"grad_norm": 1.0845067501068115, |
|
"learning_rate": 0.00019692716829719194, |
|
"loss": 0.3629, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08977035490605428, |
|
"grad_norm": 1.1626365184783936, |
|
"learning_rate": 0.00019684511629855888, |
|
"loss": 0.4327, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09081419624217119, |
|
"grad_norm": 1.1431448459625244, |
|
"learning_rate": 0.00019676200074383692, |
|
"loss": 0.2941, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0918580375782881, |
|
"grad_norm": 1.0742226839065552, |
|
"learning_rate": 0.00019667782254580374, |
|
"loss": 0.365, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09290187891440502, |
|
"grad_norm": 1.0580729246139526, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 0.351, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09394572025052192, |
|
"grad_norm": 1.1420572996139526, |
|
"learning_rate": 0.0001965062819292537, |
|
"loss": 0.3829, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09498956158663883, |
|
"grad_norm": 0.9046992063522339, |
|
"learning_rate": 0.0001964189213946013, |
|
"loss": 0.3192, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.09603340292275574, |
|
"grad_norm": 0.94282066822052, |
|
"learning_rate": 0.00019633050198434576, |
|
"loss": 0.251, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09707724425887265, |
|
"grad_norm": 0.9391703009605408, |
|
"learning_rate": 0.0001962410246695118, |
|
"loss": 0.2545, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09812108559498957, |
|
"grad_norm": 1.068588137626648, |
|
"learning_rate": 0.00019615049043274205, |
|
"loss": 0.2889, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09916492693110647, |
|
"grad_norm": 0.8007175922393799, |
|
"learning_rate": 0.00019605890026828634, |
|
"loss": 0.1906, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10020876826722339, |
|
"grad_norm": 1.195952296257019, |
|
"learning_rate": 0.00019596625518199077, |
|
"loss": 0.3045, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10125260960334029, |
|
"grad_norm": 1.1844594478607178, |
|
"learning_rate": 0.00019587255619128648, |
|
"loss": 0.2509, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1022964509394572, |
|
"grad_norm": 1.0775648355484009, |
|
"learning_rate": 0.00019577780432517879, |
|
"loss": 0.1594, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.10334029227557412, |
|
"grad_norm": 1.0487576723098755, |
|
"learning_rate": 0.00019568200062423555, |
|
"loss": 0.1619, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.10438413361169102, |
|
"grad_norm": 3.752366304397583, |
|
"learning_rate": 0.00019558514614057609, |
|
"loss": 0.4774, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10542797494780794, |
|
"grad_norm": 1.1260536909103394, |
|
"learning_rate": 0.00019548724193785933, |
|
"loss": 0.4538, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.10647181628392484, |
|
"grad_norm": 1.2162113189697266, |
|
"learning_rate": 0.0001953882890912723, |
|
"loss": 0.4851, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.10751565762004175, |
|
"grad_norm": 1.2823134660720825, |
|
"learning_rate": 0.00019528828868751818, |
|
"loss": 0.5302, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.10855949895615867, |
|
"grad_norm": 1.2430353164672852, |
|
"learning_rate": 0.0001951872418248046, |
|
"loss": 0.4463, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.10960334029227557, |
|
"grad_norm": 0.8766555786132812, |
|
"learning_rate": 0.00019508514961283138, |
|
"loss": 0.3673, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11064718162839249, |
|
"grad_norm": 1.0745108127593994, |
|
"learning_rate": 0.00019498201317277828, |
|
"loss": 0.4313, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.11169102296450939, |
|
"grad_norm": 0.904514729976654, |
|
"learning_rate": 0.00019487783363729294, |
|
"loss": 0.3183, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1127348643006263, |
|
"grad_norm": 1.1046079397201538, |
|
"learning_rate": 0.00019477261215047835, |
|
"loss": 0.389, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.11377870563674322, |
|
"grad_norm": 0.9722571969032288, |
|
"learning_rate": 0.00019466634986788005, |
|
"loss": 0.3647, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.11482254697286012, |
|
"grad_norm": 0.8949522376060486, |
|
"learning_rate": 0.0001945590479564738, |
|
"loss": 0.3807, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11586638830897704, |
|
"grad_norm": 0.8489611744880676, |
|
"learning_rate": 0.00019445070759465253, |
|
"loss": 0.3148, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.11691022964509394, |
|
"grad_norm": 0.8171373009681702, |
|
"learning_rate": 0.00019434132997221345, |
|
"loss": 0.2941, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.11795407098121086, |
|
"grad_norm": 0.8785704970359802, |
|
"learning_rate": 0.00019423091629034507, |
|
"loss": 0.3087, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.11899791231732777, |
|
"grad_norm": 0.8450028896331787, |
|
"learning_rate": 0.00019411946776161387, |
|
"loss": 0.2428, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.12004175365344467, |
|
"grad_norm": 0.7037304639816284, |
|
"learning_rate": 0.00019400698560995103, |
|
"loss": 0.1534, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12108559498956159, |
|
"grad_norm": 0.8595078587532043, |
|
"learning_rate": 0.00019389347107063912, |
|
"loss": 0.1996, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.12212943632567849, |
|
"grad_norm": 0.7243440747261047, |
|
"learning_rate": 0.00019377892539029827, |
|
"loss": 0.1407, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.12317327766179541, |
|
"grad_norm": 0.7662886381149292, |
|
"learning_rate": 0.0001936633498268728, |
|
"loss": 0.187, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.12421711899791232, |
|
"grad_norm": 0.9693275094032288, |
|
"learning_rate": 0.0001935467456496171, |
|
"loss": 0.2622, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.12526096033402923, |
|
"grad_norm": 0.827397882938385, |
|
"learning_rate": 0.0001934291141390819, |
|
"loss": 0.2819, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12630480167014613, |
|
"grad_norm": 0.7069506645202637, |
|
"learning_rate": 0.0001933104565871001, |
|
"loss": 0.2907, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.12734864300626306, |
|
"grad_norm": 0.7466704845428467, |
|
"learning_rate": 0.00019319077429677268, |
|
"loss": 0.209, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.12839248434237996, |
|
"grad_norm": 0.8960225582122803, |
|
"learning_rate": 0.00019307006858245424, |
|
"loss": 0.219, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.12943632567849686, |
|
"grad_norm": 0.7968412041664124, |
|
"learning_rate": 0.0001929483407697387, |
|
"loss": 0.292, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1304801670146138, |
|
"grad_norm": 0.7965363264083862, |
|
"learning_rate": 0.00019282559219544477, |
|
"loss": 0.3051, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1315240083507307, |
|
"grad_norm": 1.490718126296997, |
|
"learning_rate": 0.00019270182420760102, |
|
"loss": 0.485, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1325678496868476, |
|
"grad_norm": 1.431773066520691, |
|
"learning_rate": 0.00019257703816543144, |
|
"loss": 0.4818, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1336116910229645, |
|
"grad_norm": 1.4318815469741821, |
|
"learning_rate": 0.00019245123543934017, |
|
"loss": 0.5472, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.13465553235908143, |
|
"grad_norm": 1.2134075164794922, |
|
"learning_rate": 0.00019232441741089676, |
|
"loss": 0.4773, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.13569937369519833, |
|
"grad_norm": 1.110347032546997, |
|
"learning_rate": 0.00019219658547282067, |
|
"loss": 0.4516, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13674321503131523, |
|
"grad_norm": 1.122799277305603, |
|
"learning_rate": 0.00019206774102896627, |
|
"loss": 0.5222, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.13778705636743216, |
|
"grad_norm": 0.9539543390274048, |
|
"learning_rate": 0.00019193788549430724, |
|
"loss": 0.3994, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.13883089770354906, |
|
"grad_norm": 0.9659099578857422, |
|
"learning_rate": 0.00019180702029492118, |
|
"loss": 0.3946, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.13987473903966596, |
|
"grad_norm": 1.0640462636947632, |
|
"learning_rate": 0.00019167514686797369, |
|
"loss": 0.4409, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1409185803757829, |
|
"grad_norm": 1.1047799587249756, |
|
"learning_rate": 0.00019154226666170295, |
|
"loss": 0.4123, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1419624217118998, |
|
"grad_norm": 0.9255096912384033, |
|
"learning_rate": 0.00019140838113540346, |
|
"loss": 0.2524, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1430062630480167, |
|
"grad_norm": 0.9960671663284302, |
|
"learning_rate": 0.00019127349175941032, |
|
"loss": 0.3268, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1440501043841336, |
|
"grad_norm": 1.0808758735656738, |
|
"learning_rate": 0.0001911376000150828, |
|
"loss": 0.4802, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.14509394572025053, |
|
"grad_norm": 0.9004728198051453, |
|
"learning_rate": 0.00019100070739478832, |
|
"loss": 0.3092, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.14613778705636743, |
|
"grad_norm": 0.9561731219291687, |
|
"learning_rate": 0.00019086281540188588, |
|
"loss": 0.2572, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14718162839248433, |
|
"grad_norm": 1.1557111740112305, |
|
"learning_rate": 0.00019072392555070965, |
|
"loss": 0.3391, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.14822546972860126, |
|
"grad_norm": 1.0732665061950684, |
|
"learning_rate": 0.00019058403936655233, |
|
"loss": 0.2717, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.14926931106471816, |
|
"grad_norm": 1.2010210752487183, |
|
"learning_rate": 0.00019044315838564834, |
|
"loss": 0.3122, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.15031315240083507, |
|
"grad_norm": 1.0502979755401611, |
|
"learning_rate": 0.000190301284155157, |
|
"loss": 0.2789, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.151356993736952, |
|
"grad_norm": 0.835246205329895, |
|
"learning_rate": 0.0001901584182331456, |
|
"loss": 0.1788, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1524008350730689, |
|
"grad_norm": 0.9552949666976929, |
|
"learning_rate": 0.00019001456218857208, |
|
"loss": 0.2785, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1534446764091858, |
|
"grad_norm": 0.6930809617042542, |
|
"learning_rate": 0.00018986971760126805, |
|
"loss": 0.1823, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1544885177453027, |
|
"grad_norm": 0.6930037140846252, |
|
"learning_rate": 0.00018972388606192125, |
|
"loss": 0.1895, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.15553235908141963, |
|
"grad_norm": 0.8468357920646667, |
|
"learning_rate": 0.0001895770691720582, |
|
"loss": 0.1717, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.15657620041753653, |
|
"grad_norm": 0.9056650996208191, |
|
"learning_rate": 0.0001894292685440266, |
|
"loss": 0.1854, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15762004175365343, |
|
"grad_norm": 1.131759524345398, |
|
"learning_rate": 0.00018928048580097757, |
|
"loss": 0.441, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.15866388308977036, |
|
"grad_norm": 1.426763892173767, |
|
"learning_rate": 0.00018913072257684778, |
|
"loss": 0.5142, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.15970772442588727, |
|
"grad_norm": 1.4198706150054932, |
|
"learning_rate": 0.00018897998051634166, |
|
"loss": 0.5212, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.16075156576200417, |
|
"grad_norm": 1.228197693824768, |
|
"learning_rate": 0.0001888282612749132, |
|
"loss": 0.4639, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1617954070981211, |
|
"grad_norm": 0.7881720662117004, |
|
"learning_rate": 0.0001886755665187479, |
|
"loss": 0.3905, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.162839248434238, |
|
"grad_norm": 0.7964795231819153, |
|
"learning_rate": 0.00018852189792474425, |
|
"loss": 0.3802, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1638830897703549, |
|
"grad_norm": 1.1133593320846558, |
|
"learning_rate": 0.00018836725718049562, |
|
"loss": 0.3962, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1649269311064718, |
|
"grad_norm": 1.182551383972168, |
|
"learning_rate": 0.00018821164598427145, |
|
"loss": 0.3305, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.16597077244258873, |
|
"grad_norm": 0.8984887003898621, |
|
"learning_rate": 0.0001880550660449988, |
|
"loss": 0.2847, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.16701461377870563, |
|
"grad_norm": 0.8994187116622925, |
|
"learning_rate": 0.00018789751908224338, |
|
"loss": 0.346, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16805845511482254, |
|
"grad_norm": 0.7379996180534363, |
|
"learning_rate": 0.0001877390068261909, |
|
"loss": 0.3183, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.16910229645093947, |
|
"grad_norm": 0.719083845615387, |
|
"learning_rate": 0.00018757953101762787, |
|
"loss": 0.2136, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.17014613778705637, |
|
"grad_norm": 1.2782928943634033, |
|
"learning_rate": 0.00018741909340792262, |
|
"loss": 0.1879, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.17118997912317327, |
|
"grad_norm": 0.6968748569488525, |
|
"learning_rate": 0.000187257695759006, |
|
"loss": 0.1594, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1722338204592902, |
|
"grad_norm": 0.8432148098945618, |
|
"learning_rate": 0.00018709533984335192, |
|
"loss": 0.1767, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1732776617954071, |
|
"grad_norm": 0.6877685785293579, |
|
"learning_rate": 0.00018693202744395827, |
|
"loss": 0.1787, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.174321503131524, |
|
"grad_norm": 0.9011303782463074, |
|
"learning_rate": 0.0001867677603543268, |
|
"loss": 0.1692, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.17536534446764093, |
|
"grad_norm": 0.8391153216362, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.2701, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.17640918580375783, |
|
"grad_norm": 0.7215454578399658, |
|
"learning_rate": 0.0001864363693307604, |
|
"loss": 0.2727, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.17745302713987474, |
|
"grad_norm": 0.9198095202445984, |
|
"learning_rate": 0.000186269249036172, |
|
"loss": 0.2709, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17849686847599164, |
|
"grad_norm": 0.6714828014373779, |
|
"learning_rate": 0.00018610118132999896, |
|
"loss": 0.2343, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.17954070981210857, |
|
"grad_norm": 0.7863060235977173, |
|
"learning_rate": 0.00018593216805796612, |
|
"loss": 0.3066, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.18058455114822547, |
|
"grad_norm": 0.8642680644989014, |
|
"learning_rate": 0.00018576221107618243, |
|
"loss": 0.3599, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.18162839248434237, |
|
"grad_norm": 1.3609846830368042, |
|
"learning_rate": 0.00018559131225112085, |
|
"loss": 0.435, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1826722338204593, |
|
"grad_norm": 1.3618521690368652, |
|
"learning_rate": 0.00018541947345959754, |
|
"loss": 0.5144, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1837160751565762, |
|
"grad_norm": 1.10430908203125, |
|
"learning_rate": 0.00018524669658875152, |
|
"loss": 0.465, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1847599164926931, |
|
"grad_norm": 0.8763125538825989, |
|
"learning_rate": 0.00018507298353602375, |
|
"loss": 0.4001, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.18580375782881003, |
|
"grad_norm": 0.979162335395813, |
|
"learning_rate": 0.00018489833620913642, |
|
"loss": 0.4281, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.18684759916492694, |
|
"grad_norm": 0.9191274642944336, |
|
"learning_rate": 0.00018472275652607186, |
|
"loss": 0.4362, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.18789144050104384, |
|
"grad_norm": 1.0854690074920654, |
|
"learning_rate": 0.0001845462464150517, |
|
"loss": 0.4294, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18893528183716074, |
|
"grad_norm": 1.0451358556747437, |
|
"learning_rate": 0.00018436880781451544, |
|
"loss": 0.4065, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.18997912317327767, |
|
"grad_norm": 1.0197910070419312, |
|
"learning_rate": 0.00018419044267309939, |
|
"loss": 0.3585, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.19102296450939457, |
|
"grad_norm": 0.9235432744026184, |
|
"learning_rate": 0.000184011152949615, |
|
"loss": 0.4618, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.19206680584551147, |
|
"grad_norm": 0.9856287837028503, |
|
"learning_rate": 0.00018383094061302766, |
|
"loss": 0.3951, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1931106471816284, |
|
"grad_norm": 0.851546585559845, |
|
"learning_rate": 0.0001836498076424349, |
|
"loss": 0.3907, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1941544885177453, |
|
"grad_norm": 0.936446487903595, |
|
"learning_rate": 0.00018346775602704464, |
|
"loss": 0.3234, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1951983298538622, |
|
"grad_norm": 0.8742521405220032, |
|
"learning_rate": 0.00018328478776615334, |
|
"loss": 0.3065, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.19624217118997914, |
|
"grad_norm": 0.6365717053413391, |
|
"learning_rate": 0.00018310090486912424, |
|
"loss": 0.1979, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.19728601252609604, |
|
"grad_norm": 0.7701941132545471, |
|
"learning_rate": 0.000182916109355365, |
|
"loss": 0.2322, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.19832985386221294, |
|
"grad_norm": 0.8491252064704895, |
|
"learning_rate": 0.00018273040325430574, |
|
"loss": 0.2477, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19937369519832984, |
|
"grad_norm": 0.8160005807876587, |
|
"learning_rate": 0.00018254378860537666, |
|
"loss": 0.2554, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.20041753653444677, |
|
"grad_norm": 0.7356383800506592, |
|
"learning_rate": 0.0001823562674579856, |
|
"loss": 0.2209, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.20146137787056367, |
|
"grad_norm": 0.734779953956604, |
|
"learning_rate": 0.0001821678418714957, |
|
"loss": 0.2162, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.20250521920668058, |
|
"grad_norm": 0.8594037294387817, |
|
"learning_rate": 0.00018197851391520264, |
|
"loss": 0.2831, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2035490605427975, |
|
"grad_norm": 0.6627790331840515, |
|
"learning_rate": 0.00018178828566831192, |
|
"loss": 0.1818, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2045929018789144, |
|
"grad_norm": 0.9464810490608215, |
|
"learning_rate": 0.00018159715921991612, |
|
"loss": 0.2376, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2056367432150313, |
|
"grad_norm": 0.8601820468902588, |
|
"learning_rate": 0.00018140513666897178, |
|
"loss": 0.1824, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.20668058455114824, |
|
"grad_norm": 0.7504302263259888, |
|
"learning_rate": 0.00018121222012427665, |
|
"loss": 0.2081, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.20772442588726514, |
|
"grad_norm": 0.6832506060600281, |
|
"learning_rate": 0.00018101841170444616, |
|
"loss": 0.1742, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.20876826722338204, |
|
"grad_norm": 1.3405510187149048, |
|
"learning_rate": 0.00018082371353789046, |
|
"loss": 0.2519, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20981210855949894, |
|
"grad_norm": 0.8375347852706909, |
|
"learning_rate": 0.00018062812776279086, |
|
"loss": 0.3868, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.21085594989561587, |
|
"grad_norm": 0.9397875070571899, |
|
"learning_rate": 0.00018043165652707649, |
|
"loss": 0.4824, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.21189979123173278, |
|
"grad_norm": 0.9745060205459595, |
|
"learning_rate": 0.00018023430198840058, |
|
"loss": 0.4726, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.21294363256784968, |
|
"grad_norm": 0.9175118207931519, |
|
"learning_rate": 0.00018003606631411678, |
|
"loss": 0.3722, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.2139874739039666, |
|
"grad_norm": 0.831420361995697, |
|
"learning_rate": 0.0001798369516812555, |
|
"loss": 0.387, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2150313152400835, |
|
"grad_norm": 0.7558404207229614, |
|
"learning_rate": 0.00017963696027649986, |
|
"loss": 0.3522, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2160751565762004, |
|
"grad_norm": 0.8819952607154846, |
|
"learning_rate": 0.0001794360942961617, |
|
"loss": 0.3665, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.21711899791231734, |
|
"grad_norm": 0.9681791663169861, |
|
"learning_rate": 0.00017923435594615744, |
|
"loss": 0.3571, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.21816283924843424, |
|
"grad_norm": 1.2287527322769165, |
|
"learning_rate": 0.000179031747441984, |
|
"loss": 0.269, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.21920668058455114, |
|
"grad_norm": 0.9387004375457764, |
|
"learning_rate": 0.0001788282710086942, |
|
"loss": 0.2254, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22025052192066805, |
|
"grad_norm": 0.6556700468063354, |
|
"learning_rate": 0.0001786239288808727, |
|
"loss": 0.2438, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.22129436325678498, |
|
"grad_norm": 0.8871288895606995, |
|
"learning_rate": 0.00017841872330261101, |
|
"loss": 0.2408, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.22233820459290188, |
|
"grad_norm": 0.6930763125419617, |
|
"learning_rate": 0.00017821265652748328, |
|
"loss": 0.1755, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.22338204592901878, |
|
"grad_norm": 0.6196439862251282, |
|
"learning_rate": 0.00017800573081852122, |
|
"loss": 0.1714, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2244258872651357, |
|
"grad_norm": 0.4868805408477783, |
|
"learning_rate": 0.00017779794844818944, |
|
"loss": 0.1688, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2254697286012526, |
|
"grad_norm": 0.5908803343772888, |
|
"learning_rate": 0.0001775893116983604, |
|
"loss": 0.1653, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2265135699373695, |
|
"grad_norm": 0.7646514177322388, |
|
"learning_rate": 0.00017737982286028937, |
|
"loss": 0.3009, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.22755741127348644, |
|
"grad_norm": 0.7528402209281921, |
|
"learning_rate": 0.00017716948423458938, |
|
"loss": 0.3261, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.22860125260960334, |
|
"grad_norm": 0.6268967390060425, |
|
"learning_rate": 0.0001769582981312057, |
|
"loss": 0.2322, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.22964509394572025, |
|
"grad_norm": 0.7152937650680542, |
|
"learning_rate": 0.0001767462668693908, |
|
"loss": 0.2756, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23068893528183715, |
|
"grad_norm": 0.7480033040046692, |
|
"learning_rate": 0.00017653339277767853, |
|
"loss": 0.2613, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.23173277661795408, |
|
"grad_norm": 0.6832448840141296, |
|
"learning_rate": 0.00017631967819385885, |
|
"loss": 0.1719, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.23277661795407098, |
|
"grad_norm": 0.8552375435829163, |
|
"learning_rate": 0.00017610512546495195, |
|
"loss": 0.3867, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.23382045929018788, |
|
"grad_norm": 1.193741798400879, |
|
"learning_rate": 0.00017588973694718262, |
|
"loss": 0.4765, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2348643006263048, |
|
"grad_norm": 1.2147541046142578, |
|
"learning_rate": 0.00017567351500595425, |
|
"loss": 0.4603, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2359081419624217, |
|
"grad_norm": 1.028960108757019, |
|
"learning_rate": 0.00017545646201582303, |
|
"loss": 0.4966, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.23695198329853862, |
|
"grad_norm": 1.021507978439331, |
|
"learning_rate": 0.00017523858036047157, |
|
"loss": 0.3946, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.23799582463465555, |
|
"grad_norm": 0.9800359606742859, |
|
"learning_rate": 0.00017501987243268306, |
|
"loss": 0.4061, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.23903966597077245, |
|
"grad_norm": 1.0288289785385132, |
|
"learning_rate": 0.00017480034063431477, |
|
"loss": 0.4, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.24008350730688935, |
|
"grad_norm": 0.9607250690460205, |
|
"learning_rate": 0.00017457998737627182, |
|
"loss": 0.3849, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24112734864300625, |
|
"grad_norm": 0.9097132086753845, |
|
"learning_rate": 0.00017435881507848046, |
|
"loss": 0.4232, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.24217118997912318, |
|
"grad_norm": 0.9774672985076904, |
|
"learning_rate": 0.00017413682616986185, |
|
"loss": 0.4199, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.24321503131524008, |
|
"grad_norm": 0.9056701064109802, |
|
"learning_rate": 0.00017391402308830503, |
|
"loss": 0.3844, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.24425887265135698, |
|
"grad_norm": 0.9123631119728088, |
|
"learning_rate": 0.00017369040828064047, |
|
"loss": 0.3614, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2453027139874739, |
|
"grad_norm": 0.8399245142936707, |
|
"learning_rate": 0.00017346598420261294, |
|
"loss": 0.3572, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.24634655532359082, |
|
"grad_norm": 1.1125223636627197, |
|
"learning_rate": 0.00017324075331885466, |
|
"loss": 0.3155, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.24739039665970772, |
|
"grad_norm": 0.7240562438964844, |
|
"learning_rate": 0.00017301471810285822, |
|
"loss": 0.281, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.24843423799582465, |
|
"grad_norm": 0.6814681887626648, |
|
"learning_rate": 0.00017278788103694943, |
|
"loss": 0.2605, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.24947807933194155, |
|
"grad_norm": 0.7790616154670715, |
|
"learning_rate": 0.00017256024461226001, |
|
"loss": 0.3214, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.25052192066805845, |
|
"grad_norm": 0.7958056926727295, |
|
"learning_rate": 0.00017233181132870028, |
|
"loss": 0.2668, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25052192066805845, |
|
"eval_loss": 0.28679800033569336, |
|
"eval_runtime": 81.356, |
|
"eval_samples_per_second": 19.839, |
|
"eval_steps_per_second": 9.919, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25156576200417535, |
|
"grad_norm": 0.7587823271751404, |
|
"learning_rate": 0.0001721025836949317, |
|
"loss": 0.2911, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.25260960334029225, |
|
"grad_norm": 0.8206672668457031, |
|
"learning_rate": 0.00017187256422833929, |
|
"loss": 0.2602, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2536534446764092, |
|
"grad_norm": 0.7913001179695129, |
|
"learning_rate": 0.000171641755455004, |
|
"loss": 0.242, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2546972860125261, |
|
"grad_norm": 0.6242060661315918, |
|
"learning_rate": 0.000171410159909675, |
|
"loss": 0.2098, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.255741127348643, |
|
"grad_norm": 0.9254711270332336, |
|
"learning_rate": 0.00017117778013574174, |
|
"loss": 0.2028, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2567849686847599, |
|
"grad_norm": 0.804876446723938, |
|
"learning_rate": 0.00017094461868520622, |
|
"loss": 0.215, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.2578288100208768, |
|
"grad_norm": 0.786629319190979, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.2075, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.2588726513569937, |
|
"grad_norm": 0.9948318004608154, |
|
"learning_rate": 0.0001704759610052299, |
|
"loss": 0.243, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.2599164926931106, |
|
"grad_norm": 1.256179928779602, |
|
"learning_rate": 0.00017024046992260237, |
|
"loss": 0.2296, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2609603340292276, |
|
"grad_norm": 0.9201668500900269, |
|
"learning_rate": 0.00017000420745694254, |
|
"loss": 0.1739, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2620041753653445, |
|
"grad_norm": 0.8208956122398376, |
|
"learning_rate": 0.0001697671762028922, |
|
"loss": 0.4128, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2630480167014614, |
|
"grad_norm": 0.8502789735794067, |
|
"learning_rate": 0.00016952937876353597, |
|
"loss": 0.4016, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2640918580375783, |
|
"grad_norm": 1.1290831565856934, |
|
"learning_rate": 0.00016929081775037276, |
|
"loss": 0.442, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2651356993736952, |
|
"grad_norm": 0.9651896953582764, |
|
"learning_rate": 0.00016905149578328702, |
|
"loss": 0.4346, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2661795407098121, |
|
"grad_norm": 0.9524455070495605, |
|
"learning_rate": 0.00016881141549052013, |
|
"loss": 0.3441, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.267223382045929, |
|
"grad_norm": 0.8862738013267517, |
|
"learning_rate": 0.00016857057950864132, |
|
"loss": 0.3336, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.26826722338204595, |
|
"grad_norm": 0.8053174614906311, |
|
"learning_rate": 0.00016832899048251884, |
|
"loss": 0.3265, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.26931106471816285, |
|
"grad_norm": 0.7121575474739075, |
|
"learning_rate": 0.00016808665106529094, |
|
"loss": 0.3, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.27035490605427975, |
|
"grad_norm": 0.8516458868980408, |
|
"learning_rate": 0.00016784356391833665, |
|
"loss": 0.2941, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.27139874739039666, |
|
"grad_norm": 0.9382047653198242, |
|
"learning_rate": 0.0001675997317112466, |
|
"loss": 0.3625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.27244258872651356, |
|
"grad_norm": 1.0630011558532715, |
|
"learning_rate": 0.00016735515712179368, |
|
"loss": 0.2875, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.27348643006263046, |
|
"grad_norm": 0.8051387071609497, |
|
"learning_rate": 0.0001671098428359037, |
|
"loss": 0.2498, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2745302713987474, |
|
"grad_norm": 0.898259162902832, |
|
"learning_rate": 0.00016686379154762574, |
|
"loss": 0.2293, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2755741127348643, |
|
"grad_norm": 0.6109540462493896, |
|
"learning_rate": 0.00016661700595910285, |
|
"loss": 0.2029, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2766179540709812, |
|
"grad_norm": 0.5780929327011108, |
|
"learning_rate": 0.0001663694887805419, |
|
"loss": 0.1858, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2776617954070981, |
|
"grad_norm": 0.5590531826019287, |
|
"learning_rate": 0.0001661212427301844, |
|
"loss": 0.1911, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.278705636743215, |
|
"grad_norm": 0.382966548204422, |
|
"learning_rate": 0.00016587227053427612, |
|
"loss": 0.114, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2797494780793319, |
|
"grad_norm": 0.7528713941574097, |
|
"learning_rate": 0.00016562257492703757, |
|
"loss": 0.2135, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2807933194154488, |
|
"grad_norm": 0.5933663845062256, |
|
"learning_rate": 0.00016537215865063365, |
|
"loss": 0.1993, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.2818371607515658, |
|
"grad_norm": 0.7138686776161194, |
|
"learning_rate": 0.00016512102445514375, |
|
"loss": 0.2874, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2828810020876827, |
|
"grad_norm": 0.6350995898246765, |
|
"learning_rate": 0.0001648691750985314, |
|
"loss": 0.2225, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2839248434237996, |
|
"grad_norm": 0.7503290772438049, |
|
"learning_rate": 0.00016461661334661414, |
|
"loss": 0.2687, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2849686847599165, |
|
"grad_norm": 0.633063018321991, |
|
"learning_rate": 0.00016436334197303295, |
|
"loss": 0.1867, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2860125260960334, |
|
"grad_norm": 0.750601053237915, |
|
"learning_rate": 0.000164109363759222, |
|
"loss": 0.1644, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2870563674321503, |
|
"grad_norm": 0.9629106521606445, |
|
"learning_rate": 0.000163854681494378, |
|
"loss": 0.463, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2881002087682672, |
|
"grad_norm": 1.2181180715560913, |
|
"learning_rate": 0.00016359929797542944, |
|
"loss": 0.497, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.28914405010438415, |
|
"grad_norm": 1.0051687955856323, |
|
"learning_rate": 0.00016334321600700613, |
|
"loss": 0.3734, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.29018789144050106, |
|
"grad_norm": 0.8457571268081665, |
|
"learning_rate": 0.00016308643840140828, |
|
"loss": 0.3634, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.29123173277661796, |
|
"grad_norm": 0.8827991485595703, |
|
"learning_rate": 0.00016282896797857548, |
|
"loss": 0.4218, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.29227557411273486, |
|
"grad_norm": 0.767482578754425, |
|
"learning_rate": 0.000162570807566056, |
|
"loss": 0.3099, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29331941544885176, |
|
"grad_norm": 0.9973590970039368, |
|
"learning_rate": 0.00016231195999897558, |
|
"loss": 0.3751, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.29436325678496866, |
|
"grad_norm": 1.094586730003357, |
|
"learning_rate": 0.00016205242812000617, |
|
"loss": 0.3329, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2954070981210856, |
|
"grad_norm": 1.1051408052444458, |
|
"learning_rate": 0.0001617922147793351, |
|
"loss": 0.3888, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2964509394572025, |
|
"grad_norm": 1.1265891790390015, |
|
"learning_rate": 0.00016153132283463326, |
|
"loss": 0.3881, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2974947807933194, |
|
"grad_norm": 0.9808762669563293, |
|
"learning_rate": 0.00016126975515102422, |
|
"loss": 0.3608, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2985386221294363, |
|
"grad_norm": 0.9096065759658813, |
|
"learning_rate": 0.00016100751460105243, |
|
"loss": 0.3036, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.29958246346555323, |
|
"grad_norm": 0.7918301224708557, |
|
"learning_rate": 0.0001607446040646518, |
|
"loss": 0.265, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.30062630480167013, |
|
"grad_norm": 0.8781275153160095, |
|
"learning_rate": 0.00016048102642911397, |
|
"loss": 0.3192, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.30167014613778703, |
|
"grad_norm": 0.7994233965873718, |
|
"learning_rate": 0.00016021678458905684, |
|
"loss": 0.2619, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.302713987473904, |
|
"grad_norm": 0.7719682455062866, |
|
"learning_rate": 0.0001599518814463925, |
|
"loss": 0.2373, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3037578288100209, |
|
"grad_norm": 0.7163065075874329, |
|
"learning_rate": 0.00015968631991029555, |
|
"loss": 0.2461, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.3048016701461378, |
|
"grad_norm": 0.6648116707801819, |
|
"learning_rate": 0.00015942010289717105, |
|
"loss": 0.2033, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3058455114822547, |
|
"grad_norm": 0.7089317440986633, |
|
"learning_rate": 0.00015915323333062255, |
|
"loss": 0.2308, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.3068893528183716, |
|
"grad_norm": 0.6273900270462036, |
|
"learning_rate": 0.00015888571414141996, |
|
"loss": 0.1762, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.3079331941544885, |
|
"grad_norm": 0.570709228515625, |
|
"learning_rate": 0.00015861754826746734, |
|
"loss": 0.1612, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3089770354906054, |
|
"grad_norm": 0.7131444811820984, |
|
"learning_rate": 0.00015834873865377077, |
|
"loss": 0.1748, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.31002087682672236, |
|
"grad_norm": 0.857348620891571, |
|
"learning_rate": 0.00015807928825240566, |
|
"loss": 0.2076, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.31106471816283926, |
|
"grad_norm": 0.6636834740638733, |
|
"learning_rate": 0.00015780920002248484, |
|
"loss": 0.1577, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.31210855949895616, |
|
"grad_norm": 0.8303975462913513, |
|
"learning_rate": 0.00015753847693012566, |
|
"loss": 0.1722, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.31315240083507306, |
|
"grad_norm": 1.009426474571228, |
|
"learning_rate": 0.00015726712194841756, |
|
"loss": 0.181, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31419624217118997, |
|
"grad_norm": 0.873030960559845, |
|
"learning_rate": 0.0001569951380573894, |
|
"loss": 0.4061, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.31524008350730687, |
|
"grad_norm": 0.8714029788970947, |
|
"learning_rate": 0.0001567225282439768, |
|
"loss": 0.4541, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.3162839248434238, |
|
"grad_norm": 1.2070977687835693, |
|
"learning_rate": 0.0001564492955019892, |
|
"loss": 0.444, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.3173277661795407, |
|
"grad_norm": 1.1216182708740234, |
|
"learning_rate": 0.0001561754428320771, |
|
"loss": 0.378, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.31837160751565763, |
|
"grad_norm": 1.0232008695602417, |
|
"learning_rate": 0.00015590097324169909, |
|
"loss": 0.3044, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.31941544885177453, |
|
"grad_norm": 0.8639291524887085, |
|
"learning_rate": 0.00015562588974508872, |
|
"loss": 0.3222, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.32045929018789143, |
|
"grad_norm": 0.7577306032180786, |
|
"learning_rate": 0.00015535019536322157, |
|
"loss": 0.3346, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.32150313152400833, |
|
"grad_norm": 0.7879360914230347, |
|
"learning_rate": 0.00015507389312378197, |
|
"loss": 0.2889, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.32254697286012524, |
|
"grad_norm": 0.8477923274040222, |
|
"learning_rate": 0.0001547969860611297, |
|
"loss": 0.2948, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.3235908141962422, |
|
"grad_norm": 0.8615967631340027, |
|
"learning_rate": 0.00015451947721626676, |
|
"loss": 0.3135, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3246346555323591, |
|
"grad_norm": 1.0251150131225586, |
|
"learning_rate": 0.00015424136963680397, |
|
"loss": 0.2807, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.325678496868476, |
|
"grad_norm": 0.8408343195915222, |
|
"learning_rate": 0.00015396266637692743, |
|
"loss": 0.2879, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.3267223382045929, |
|
"grad_norm": 0.7773711681365967, |
|
"learning_rate": 0.00015368337049736502, |
|
"loss": 0.2457, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.3277661795407098, |
|
"grad_norm": 0.797435462474823, |
|
"learning_rate": 0.00015340348506535283, |
|
"loss": 0.259, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.3288100208768267, |
|
"grad_norm": 0.8681854605674744, |
|
"learning_rate": 0.00015312301315460137, |
|
"loss": 0.2163, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3298538622129436, |
|
"grad_norm": 0.8305045962333679, |
|
"learning_rate": 0.00015284195784526195, |
|
"loss": 0.1331, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.33089770354906056, |
|
"grad_norm": 0.4855158030986786, |
|
"learning_rate": 0.00015256032222389277, |
|
"loss": 0.153, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.33194154488517746, |
|
"grad_norm": 0.4702201783657074, |
|
"learning_rate": 0.00015227810938342492, |
|
"loss": 0.1527, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.33298538622129437, |
|
"grad_norm": 0.6600713133811951, |
|
"learning_rate": 0.0001519953224231287, |
|
"loss": 0.2761, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.33402922755741127, |
|
"grad_norm": 0.5896007418632507, |
|
"learning_rate": 0.00015171196444857933, |
|
"loss": 0.1978, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.33507306889352817, |
|
"grad_norm": 0.7142235040664673, |
|
"learning_rate": 0.0001514280385716229, |
|
"loss": 0.2759, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.33611691022964507, |
|
"grad_norm": 0.6415271759033203, |
|
"learning_rate": 0.00015114354791034225, |
|
"loss": 0.265, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.33716075156576203, |
|
"grad_norm": 0.6545302867889404, |
|
"learning_rate": 0.00015085849558902264, |
|
"loss": 0.2221, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.33820459290187893, |
|
"grad_norm": 0.7755191922187805, |
|
"learning_rate": 0.00015057288473811772, |
|
"loss": 0.2827, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.33924843423799583, |
|
"grad_norm": 0.960443913936615, |
|
"learning_rate": 0.00015028671849421464, |
|
"loss": 0.4552, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.34029227557411273, |
|
"grad_norm": 1.016601324081421, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.4142, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.34133611691022964, |
|
"grad_norm": 1.2163599729537964, |
|
"learning_rate": 0.00014971273240422535, |
|
"loss": 0.5009, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.34237995824634654, |
|
"grad_norm": 1.181193470954895, |
|
"learning_rate": 0.0001494249188616723, |
|
"loss": 0.5031, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.34342379958246344, |
|
"grad_norm": 0.8927332758903503, |
|
"learning_rate": 0.0001491365625331182, |
|
"loss": 0.3835, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3444676409185804, |
|
"grad_norm": 0.9175090789794922, |
|
"learning_rate": 0.00014884766658530125, |
|
"loss": 0.4822, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3455114822546973, |
|
"grad_norm": 0.8441628813743591, |
|
"learning_rate": 0.00014855823419088576, |
|
"loss": 0.3455, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3465553235908142, |
|
"grad_norm": 0.7784302234649658, |
|
"learning_rate": 0.00014826826852842726, |
|
"loss": 0.3359, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.3475991649269311, |
|
"grad_norm": 1.0156643390655518, |
|
"learning_rate": 0.00014797777278233778, |
|
"loss": 0.4488, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.348643006263048, |
|
"grad_norm": 0.9029203057289124, |
|
"learning_rate": 0.00014768675014285062, |
|
"loss": 0.3175, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3496868475991649, |
|
"grad_norm": 0.8379835486412048, |
|
"learning_rate": 0.0001473952038059855, |
|
"loss": 0.352, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.35073068893528186, |
|
"grad_norm": 0.807041585445404, |
|
"learning_rate": 0.00014710313697351341, |
|
"loss": 0.3151, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.35177453027139877, |
|
"grad_norm": 0.7760060429573059, |
|
"learning_rate": 0.0001468105528529214, |
|
"loss": 0.3397, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.35281837160751567, |
|
"grad_norm": 0.7737391591072083, |
|
"learning_rate": 0.00014651745465737737, |
|
"loss": 0.3054, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.35386221294363257, |
|
"grad_norm": 0.7160333395004272, |
|
"learning_rate": 0.00014622384560569493, |
|
"loss": 0.2271, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.35490605427974947, |
|
"grad_norm": 0.7895155549049377, |
|
"learning_rate": 0.00014592972892229778, |
|
"loss": 0.2717, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3559498956158664, |
|
"grad_norm": 0.6400478482246399, |
|
"learning_rate": 0.00014563510783718457, |
|
"loss": 0.2212, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3569937369519833, |
|
"grad_norm": 0.7159572243690491, |
|
"learning_rate": 0.0001453399855858932, |
|
"loss": 0.2192, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.35803757828810023, |
|
"grad_norm": 0.8888924717903137, |
|
"learning_rate": 0.00014504436540946548, |
|
"loss": 0.269, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.35908141962421714, |
|
"grad_norm": 0.8276658058166504, |
|
"learning_rate": 0.00014474825055441136, |
|
"loss": 0.2466, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.36012526096033404, |
|
"grad_norm": 0.690984308719635, |
|
"learning_rate": 0.00014445164427267344, |
|
"loss": 0.1649, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.36116910229645094, |
|
"grad_norm": 0.7650169730186462, |
|
"learning_rate": 0.0001441545498215912, |
|
"loss": 0.2323, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.36221294363256784, |
|
"grad_norm": 0.8575247526168823, |
|
"learning_rate": 0.00014385697046386512, |
|
"loss": 0.1856, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.36325678496868474, |
|
"grad_norm": 0.9500517249107361, |
|
"learning_rate": 0.00014355890946752102, |
|
"loss": 0.2059, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.36430062630480164, |
|
"grad_norm": 0.7111496925354004, |
|
"learning_rate": 0.00014326037010587404, |
|
"loss": 0.144, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3653444676409186, |
|
"grad_norm": 0.9617753624916077, |
|
"learning_rate": 0.0001429613556574928, |
|
"loss": 0.2208, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3663883089770355, |
|
"grad_norm": 0.7692716717720032, |
|
"learning_rate": 0.00014266186940616328, |
|
"loss": 0.4445, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3674321503131524, |
|
"grad_norm": 0.742679238319397, |
|
"learning_rate": 0.00014236191464085286, |
|
"loss": 0.3707, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3684759916492693, |
|
"grad_norm": 0.928932785987854, |
|
"learning_rate": 0.00014206149465567403, |
|
"loss": 0.4158, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.3695198329853862, |
|
"grad_norm": 1.0475050210952759, |
|
"learning_rate": 0.00014176061274984858, |
|
"loss": 0.4644, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.3705636743215031, |
|
"grad_norm": 0.9185068607330322, |
|
"learning_rate": 0.0001414592722276709, |
|
"loss": 0.4066, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.37160751565762007, |
|
"grad_norm": 0.8016102910041809, |
|
"learning_rate": 0.00014115747639847204, |
|
"loss": 0.3827, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.37265135699373697, |
|
"grad_norm": 0.689323365688324, |
|
"learning_rate": 0.0001408552285765832, |
|
"loss": 0.3004, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3736951983298539, |
|
"grad_norm": 0.721184253692627, |
|
"learning_rate": 0.00014055253208129938, |
|
"loss": 0.311, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.3747390396659708, |
|
"grad_norm": 0.8813337683677673, |
|
"learning_rate": 0.00014024939023684298, |
|
"loss": 0.3493, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3757828810020877, |
|
"grad_norm": 0.8543792366981506, |
|
"learning_rate": 0.00013994580637232716, |
|
"loss": 0.2636, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3768267223382046, |
|
"grad_norm": 0.8040288686752319, |
|
"learning_rate": 0.00013964178382171942, |
|
"loss": 0.2661, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3778705636743215, |
|
"grad_norm": 0.7446701526641846, |
|
"learning_rate": 0.00013933732592380483, |
|
"loss": 0.2464, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.37891440501043844, |
|
"grad_norm": 0.7692604064941406, |
|
"learning_rate": 0.0001390324360221496, |
|
"loss": 0.2179, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.37995824634655534, |
|
"grad_norm": 0.6927527785301208, |
|
"learning_rate": 0.00013872711746506413, |
|
"loss": 0.1913, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.38100208768267224, |
|
"grad_norm": 0.750212550163269, |
|
"learning_rate": 0.00013842137360556628, |
|
"loss": 0.1723, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.38204592901878914, |
|
"grad_norm": 0.7114387154579163, |
|
"learning_rate": 0.0001381152078013447, |
|
"loss": 0.1899, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.38308977035490605, |
|
"grad_norm": 0.671464741230011, |
|
"learning_rate": 0.00013780862341472182, |
|
"loss": 0.2059, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.38413361169102295, |
|
"grad_norm": 0.6177671551704407, |
|
"learning_rate": 0.00013750162381261693, |
|
"loss": 0.1888, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.38517745302713985, |
|
"grad_norm": 0.7235206365585327, |
|
"learning_rate": 0.0001371942123665092, |
|
"loss": 0.2612, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3862212943632568, |
|
"grad_norm": 0.5639315843582153, |
|
"learning_rate": 0.00013688639245240078, |
|
"loss": 0.2214, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3872651356993737, |
|
"grad_norm": 0.5648091435432434, |
|
"learning_rate": 0.00013657816745077955, |
|
"loss": 0.2087, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3883089770354906, |
|
"grad_norm": 0.6048844456672668, |
|
"learning_rate": 0.0001362695407465821, |
|
"loss": 0.2312, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3893528183716075, |
|
"grad_norm": 0.48515263199806213, |
|
"learning_rate": 0.0001359605157291565, |
|
"loss": 0.1624, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.3903966597077244, |
|
"grad_norm": 0.6982168555259705, |
|
"learning_rate": 0.0001356510957922251, |
|
"loss": 0.2632, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3914405010438413, |
|
"grad_norm": 0.9829317331314087, |
|
"learning_rate": 0.0001353412843338474, |
|
"loss": 0.3995, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3924843423799583, |
|
"grad_norm": 1.1811579465866089, |
|
"learning_rate": 0.00013503108475638244, |
|
"loss": 0.4381, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3935281837160752, |
|
"grad_norm": 1.0763869285583496, |
|
"learning_rate": 0.00013472050046645166, |
|
"loss": 0.4366, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3945720250521921, |
|
"grad_norm": 0.8847516179084778, |
|
"learning_rate": 0.00013440953487490144, |
|
"loss": 0.3637, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.395615866388309, |
|
"grad_norm": 0.8159587979316711, |
|
"learning_rate": 0.0001340981913967656, |
|
"loss": 0.3771, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3966597077244259, |
|
"grad_norm": 0.8111384510993958, |
|
"learning_rate": 0.00013378647345122795, |
|
"loss": 0.3473, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3977035490605428, |
|
"grad_norm": 0.8355801105499268, |
|
"learning_rate": 0.00013347438446158466, |
|
"loss": 0.381, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3987473903966597, |
|
"grad_norm": 0.7971011400222778, |
|
"learning_rate": 0.0001331619278552068, |
|
"loss": 0.3019, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.39979123173277664, |
|
"grad_norm": 0.8738229274749756, |
|
"learning_rate": 0.00013284910706350247, |
|
"loss": 0.3766, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.40083507306889354, |
|
"grad_norm": 0.8685609698295593, |
|
"learning_rate": 0.0001325359255218795, |
|
"loss": 0.3371, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.40187891440501045, |
|
"grad_norm": 0.8119929432868958, |
|
"learning_rate": 0.00013222238666970728, |
|
"loss": 0.3047, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.40292275574112735, |
|
"grad_norm": 0.831425130367279, |
|
"learning_rate": 0.00013190849395027928, |
|
"loss": 0.3241, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.40396659707724425, |
|
"grad_norm": 0.8819262981414795, |
|
"learning_rate": 0.0001315942508107751, |
|
"loss": 0.2986, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.40501043841336115, |
|
"grad_norm": 0.8911377787590027, |
|
"learning_rate": 0.00013127966070222274, |
|
"loss": 0.3614, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.40605427974947805, |
|
"grad_norm": 0.7478219866752625, |
|
"learning_rate": 0.00013096472707946056, |
|
"loss": 0.2861, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.407098121085595, |
|
"grad_norm": 0.8377799391746521, |
|
"learning_rate": 0.00013064945340109948, |
|
"loss": 0.2854, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4081419624217119, |
|
"grad_norm": 0.7409958839416504, |
|
"learning_rate": 0.00013033384312948488, |
|
"loss": 0.2589, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.4091858037578288, |
|
"grad_norm": 0.6093199849128723, |
|
"learning_rate": 0.00013001789973065853, |
|
"loss": 0.2124, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4102296450939457, |
|
"grad_norm": 0.5660393834114075, |
|
"learning_rate": 0.00012970162667432075, |
|
"loss": 0.1718, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.4112734864300626, |
|
"grad_norm": 0.7063373923301697, |
|
"learning_rate": 0.00012938502743379212, |
|
"loss": 0.2245, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.4123173277661795, |
|
"grad_norm": 0.8215602040290833, |
|
"learning_rate": 0.00012906810548597532, |
|
"loss": 0.2442, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.4133611691022965, |
|
"grad_norm": 0.6678823828697205, |
|
"learning_rate": 0.00012875086431131716, |
|
"loss": 0.1891, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4144050104384134, |
|
"grad_norm": 0.6394354701042175, |
|
"learning_rate": 0.00012843330739377, |
|
"loss": 0.2203, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.4154488517745303, |
|
"grad_norm": 0.7983390688896179, |
|
"learning_rate": 0.00012811543822075397, |
|
"loss": 0.2113, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.4164926931106472, |
|
"grad_norm": 0.611224889755249, |
|
"learning_rate": 0.0001277972602831181, |
|
"loss": 0.1616, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.4175365344467641, |
|
"grad_norm": 0.9979553818702698, |
|
"learning_rate": 0.00012747877707510252, |
|
"loss": 0.1953, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.418580375782881, |
|
"grad_norm": 0.5957475304603577, |
|
"learning_rate": 0.00012715999209429973, |
|
"loss": 0.3356, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.4196242171189979, |
|
"grad_norm": 0.6433320641517639, |
|
"learning_rate": 0.00012684090884161636, |
|
"loss": 0.3177, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.42066805845511485, |
|
"grad_norm": 0.8637453317642212, |
|
"learning_rate": 0.00012652153082123456, |
|
"loss": 0.4008, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.42171189979123175, |
|
"grad_norm": 0.6944557428359985, |
|
"learning_rate": 0.00012620186154057382, |
|
"loss": 0.3198, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.42275574112734865, |
|
"grad_norm": 0.8894799947738647, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 0.3917, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.42379958246346555, |
|
"grad_norm": 0.7825080156326294, |
|
"learning_rate": 0.0001255616632440475, |
|
"loss": 0.3679, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.42484342379958245, |
|
"grad_norm": 0.6816397309303284, |
|
"learning_rate": 0.00012524114125885957, |
|
"loss": 0.2855, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.42588726513569936, |
|
"grad_norm": 0.7824950218200684, |
|
"learning_rate": 0.0001249203420746708, |
|
"loss": 0.305, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.42693110647181626, |
|
"grad_norm": 0.7584050297737122, |
|
"learning_rate": 0.0001245992692145078, |
|
"loss": 0.3123, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.4279749478079332, |
|
"grad_norm": 0.6384229063987732, |
|
"learning_rate": 0.00012427792620440278, |
|
"loss": 0.233, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4290187891440501, |
|
"grad_norm": 0.7239426374435425, |
|
"learning_rate": 0.00012395631657335468, |
|
"loss": 0.216, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.430062630480167, |
|
"grad_norm": 0.6760838627815247, |
|
"learning_rate": 0.0001236344438532905, |
|
"loss": 0.2092, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4311064718162839, |
|
"grad_norm": 0.5736236572265625, |
|
"learning_rate": 0.00012331231157902648, |
|
"loss": 0.1725, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.4321503131524008, |
|
"grad_norm": 0.5566690564155579, |
|
"learning_rate": 0.00012298992328822937, |
|
"loss": 0.1645, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.4331941544885177, |
|
"grad_norm": 0.5182932019233704, |
|
"learning_rate": 0.00012266728252137733, |
|
"loss": 0.1484, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4342379958246347, |
|
"grad_norm": 0.6892213821411133, |
|
"learning_rate": 0.00012234439282172142, |
|
"loss": 0.1422, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4352818371607516, |
|
"grad_norm": 0.46982264518737793, |
|
"learning_rate": 0.0001220212577352464, |
|
"loss": 0.1292, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.4363256784968685, |
|
"grad_norm": 0.5547206401824951, |
|
"learning_rate": 0.0001216978808106318, |
|
"loss": 0.1813, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.4373695198329854, |
|
"grad_norm": 0.6126968264579773, |
|
"learning_rate": 0.00012137426559921316, |
|
"loss": 0.1804, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.4384133611691023, |
|
"grad_norm": 0.6041930317878723, |
|
"learning_rate": 0.0001210504156549428, |
|
"loss": 0.2519, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4394572025052192, |
|
"grad_norm": 0.6091843843460083, |
|
"learning_rate": 0.00012072633453435091, |
|
"loss": 0.2423, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.4405010438413361, |
|
"grad_norm": 0.5664049983024597, |
|
"learning_rate": 0.00012040202579650648, |
|
"loss": 0.2357, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.44154488517745305, |
|
"grad_norm": 0.4431195855140686, |
|
"learning_rate": 0.00012007749300297817, |
|
"loss": 0.1704, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.44258872651356995, |
|
"grad_norm": 0.5842788815498352, |
|
"learning_rate": 0.00011975273971779528, |
|
"loss": 0.1979, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.44363256784968685, |
|
"grad_norm": 0.8385937809944153, |
|
"learning_rate": 0.00011942776950740848, |
|
"loss": 0.4167, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.44467640918580376, |
|
"grad_norm": 0.8559632301330566, |
|
"learning_rate": 0.00011910258594065078, |
|
"loss": 0.4068, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.44572025052192066, |
|
"grad_norm": 0.8821555972099304, |
|
"learning_rate": 0.00011877719258869826, |
|
"loss": 0.3839, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.44676409185803756, |
|
"grad_norm": 0.8119932413101196, |
|
"learning_rate": 0.00011845159302503086, |
|
"loss": 0.3336, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.44780793319415446, |
|
"grad_norm": 0.8578853011131287, |
|
"learning_rate": 0.00011812579082539317, |
|
"loss": 0.4184, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4488517745302714, |
|
"grad_norm": 0.7293173670768738, |
|
"learning_rate": 0.00011779978956775506, |
|
"loss": 0.3733, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4498956158663883, |
|
"grad_norm": 0.8195939064025879, |
|
"learning_rate": 0.00011747359283227251, |
|
"loss": 0.3646, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4509394572025052, |
|
"grad_norm": 0.7359249591827393, |
|
"learning_rate": 0.00011714720420124831, |
|
"loss": 0.3201, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.4519832985386221, |
|
"grad_norm": 0.8084685206413269, |
|
"learning_rate": 0.00011682062725909258, |
|
"loss": 0.3537, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.453027139874739, |
|
"grad_norm": 0.7745420336723328, |
|
"learning_rate": 0.00011649386559228341, |
|
"loss": 0.2933, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.45407098121085593, |
|
"grad_norm": 0.81271892786026, |
|
"learning_rate": 0.00011616692278932772, |
|
"loss": 0.2821, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4551148225469729, |
|
"grad_norm": 0.7679653167724609, |
|
"learning_rate": 0.0001158398024407215, |
|
"loss": 0.2973, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4561586638830898, |
|
"grad_norm": 0.826492190361023, |
|
"learning_rate": 0.00011551250813891066, |
|
"loss": 0.311, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.4572025052192067, |
|
"grad_norm": 0.766703724861145, |
|
"learning_rate": 0.00011518504347825145, |
|
"loss": 0.2606, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.4582463465553236, |
|
"grad_norm": 0.7974645495414734, |
|
"learning_rate": 0.00011485741205497094, |
|
"loss": 0.2843, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.4592901878914405, |
|
"grad_norm": 0.6922892928123474, |
|
"learning_rate": 0.0001145296174671277, |
|
"loss": 0.2178, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4603340292275574, |
|
"grad_norm": 0.6793475151062012, |
|
"learning_rate": 0.00011420166331457207, |
|
"loss": 0.2221, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.4613778705636743, |
|
"grad_norm": 0.9168251752853394, |
|
"learning_rate": 0.00011387355319890685, |
|
"loss": 0.2686, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.46242171189979125, |
|
"grad_norm": 0.7042071223258972, |
|
"learning_rate": 0.00011354529072344748, |
|
"loss": 0.2703, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.46346555323590816, |
|
"grad_norm": 0.6087216734886169, |
|
"learning_rate": 0.00011321687949318276, |
|
"loss": 0.2095, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.46450939457202506, |
|
"grad_norm": 0.5309197306632996, |
|
"learning_rate": 0.00011288832311473508, |
|
"loss": 0.188, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.46555323590814196, |
|
"grad_norm": 0.5840495824813843, |
|
"learning_rate": 0.00011255962519632081, |
|
"loss": 0.1569, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.46659707724425886, |
|
"grad_norm": 0.541627049446106, |
|
"learning_rate": 0.00011223078934771079, |
|
"loss": 0.1778, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.46764091858037576, |
|
"grad_norm": 0.4946768581867218, |
|
"learning_rate": 0.00011190181918019049, |
|
"loss": 0.1729, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.46868475991649267, |
|
"grad_norm": 0.4716700613498688, |
|
"learning_rate": 0.00011157271830652062, |
|
"loss": 0.1518, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4697286012526096, |
|
"grad_norm": 0.8532614707946777, |
|
"learning_rate": 0.00011124349034089723, |
|
"loss": 0.1282, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4707724425887265, |
|
"grad_norm": 0.7531268000602722, |
|
"learning_rate": 0.00011091413889891211, |
|
"loss": 0.3468, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4718162839248434, |
|
"grad_norm": 0.7047679424285889, |
|
"learning_rate": 0.00011058466759751302, |
|
"loss": 0.3667, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.47286012526096033, |
|
"grad_norm": 0.7256395220756531, |
|
"learning_rate": 0.00011025508005496417, |
|
"loss": 0.3224, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.47390396659707723, |
|
"grad_norm": 0.7575612664222717, |
|
"learning_rate": 0.00010992537989080618, |
|
"loss": 0.3498, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.47494780793319413, |
|
"grad_norm": 0.7202345132827759, |
|
"learning_rate": 0.00010959557072581652, |
|
"loss": 0.3282, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4759916492693111, |
|
"grad_norm": 0.6914469599723816, |
|
"learning_rate": 0.00010926565618196978, |
|
"loss": 0.2925, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.477035490605428, |
|
"grad_norm": 0.7601653337478638, |
|
"learning_rate": 0.00010893563988239772, |
|
"loss": 0.3728, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.4780793319415449, |
|
"grad_norm": 0.756959080696106, |
|
"learning_rate": 0.0001086055254513497, |
|
"loss": 0.3186, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4791231732776618, |
|
"grad_norm": 0.6831420063972473, |
|
"learning_rate": 0.00010827531651415266, |
|
"loss": 0.2786, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4801670146137787, |
|
"grad_norm": 0.6854783296585083, |
|
"learning_rate": 0.00010794501669717145, |
|
"loss": 0.2334, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4812108559498956, |
|
"grad_norm": 0.667158305644989, |
|
"learning_rate": 0.00010761462962776897, |
|
"loss": 0.1972, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4822546972860125, |
|
"grad_norm": 0.6852616667747498, |
|
"learning_rate": 0.00010728415893426635, |
|
"loss": 0.2318, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.48329853862212946, |
|
"grad_norm": 0.6210921406745911, |
|
"learning_rate": 0.00010695360824590303, |
|
"loss": 0.2239, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.48434237995824636, |
|
"grad_norm": 0.5915560722351074, |
|
"learning_rate": 0.00010662298119279701, |
|
"loss": 0.1759, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.48538622129436326, |
|
"grad_norm": 0.5121853351593018, |
|
"learning_rate": 0.00010629228140590486, |
|
"loss": 0.1742, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.48643006263048016, |
|
"grad_norm": 0.6679707169532776, |
|
"learning_rate": 0.00010596151251698199, |
|
"loss": 0.1989, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.48747390396659707, |
|
"grad_norm": 0.5263128280639648, |
|
"learning_rate": 0.00010563067815854266, |
|
"loss": 0.1344, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.48851774530271397, |
|
"grad_norm": 0.6040886640548706, |
|
"learning_rate": 0.00010529978196382011, |
|
"loss": 0.1759, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.48956158663883087, |
|
"grad_norm": 0.4196336269378662, |
|
"learning_rate": 0.00010496882756672666, |
|
"loss": 0.1153, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4906054279749478, |
|
"grad_norm": 0.6484777927398682, |
|
"learning_rate": 0.00010463781860181385, |
|
"loss": 0.2986, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.49164926931106473, |
|
"grad_norm": 0.651329755783081, |
|
"learning_rate": 0.00010430675870423246, |
|
"loss": 0.2466, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.49269311064718163, |
|
"grad_norm": 0.5095123648643494, |
|
"learning_rate": 0.0001039756515096926, |
|
"loss": 0.199, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.49373695198329853, |
|
"grad_norm": 0.5218927264213562, |
|
"learning_rate": 0.00010364450065442377, |
|
"loss": 0.1569, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.49478079331941544, |
|
"grad_norm": 0.5320420861244202, |
|
"learning_rate": 0.00010331330977513509, |
|
"loss": 0.1954, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.49582463465553234, |
|
"grad_norm": 0.7471246719360352, |
|
"learning_rate": 0.00010298208250897503, |
|
"loss": 0.3619, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4968684759916493, |
|
"grad_norm": 0.7712672352790833, |
|
"learning_rate": 0.00010265082249349187, |
|
"loss": 0.2926, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.4979123173277662, |
|
"grad_norm": 0.8972386717796326, |
|
"learning_rate": 0.00010231953336659334, |
|
"loss": 0.3387, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.4989561586638831, |
|
"grad_norm": 0.827797532081604, |
|
"learning_rate": 0.00010198821876650701, |
|
"loss": 0.3206, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.8829711675643921, |
|
"learning_rate": 0.00010165688233174017, |
|
"loss": 0.3861, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5010438413361169, |
|
"grad_norm": 0.8328503370285034, |
|
"learning_rate": 0.00010132552770103987, |
|
"loss": 0.4158, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5010438413361169, |
|
"eval_loss": 0.2552998960018158, |
|
"eval_runtime": 81.2486, |
|
"eval_samples_per_second": 19.865, |
|
"eval_steps_per_second": 9.932, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5020876826722338, |
|
"grad_norm": 0.7070721387863159, |
|
"learning_rate": 0.00010099415851335299, |
|
"loss": 0.3174, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.5031315240083507, |
|
"grad_norm": 0.7391024827957153, |
|
"learning_rate": 0.00010066277840778626, |
|
"loss": 0.3442, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5041753653444676, |
|
"grad_norm": 0.7629324793815613, |
|
"learning_rate": 0.00010033139102356642, |
|
"loss": 0.3439, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.5052192066805845, |
|
"grad_norm": 0.7324389219284058, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3063, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5062630480167014, |
|
"grad_norm": 0.6402798295021057, |
|
"learning_rate": 9.966860897643359e-05, |
|
"loss": 0.2383, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5073068893528184, |
|
"grad_norm": 0.7618774771690369, |
|
"learning_rate": 9.933722159221376e-05, |
|
"loss": 0.3004, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5083507306889353, |
|
"grad_norm": 0.8296042680740356, |
|
"learning_rate": 9.900584148664704e-05, |
|
"loss": 0.3208, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.5093945720250522, |
|
"grad_norm": 0.7663673162460327, |
|
"learning_rate": 9.867447229896018e-05, |
|
"loss": 0.3204, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5104384133611691, |
|
"grad_norm": 0.7188003063201904, |
|
"learning_rate": 9.834311766825985e-05, |
|
"loss": 0.2645, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.511482254697286, |
|
"grad_norm": 0.6017361879348755, |
|
"learning_rate": 9.801178123349298e-05, |
|
"loss": 0.2076, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5125260960334029, |
|
"grad_norm": 0.6702793836593628, |
|
"learning_rate": 9.768046663340669e-05, |
|
"loss": 0.2207, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.5135699373695198, |
|
"grad_norm": 0.6283150911331177, |
|
"learning_rate": 9.734917750650816e-05, |
|
"loss": 0.2246, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.5146137787056367, |
|
"grad_norm": 0.6348150968551636, |
|
"learning_rate": 9.701791749102495e-05, |
|
"loss": 0.1896, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.5156576200417536, |
|
"grad_norm": 0.6684585213661194, |
|
"learning_rate": 9.668669022486494e-05, |
|
"loss": 0.2409, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5167014613778705, |
|
"grad_norm": 0.678677499294281, |
|
"learning_rate": 9.635549934557625e-05, |
|
"loss": 0.216, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5177453027139874, |
|
"grad_norm": 0.6523580551147461, |
|
"learning_rate": 9.602434849030745e-05, |
|
"loss": 0.1894, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5187891440501043, |
|
"grad_norm": 0.5122499465942383, |
|
"learning_rate": 9.569324129576757e-05, |
|
"loss": 0.1579, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.5198329853862212, |
|
"grad_norm": 0.5820009112358093, |
|
"learning_rate": 9.536218139818614e-05, |
|
"loss": 0.1766, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5208768267223383, |
|
"grad_norm": 0.5032172203063965, |
|
"learning_rate": 9.503117243327337e-05, |
|
"loss": 0.1519, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5219206680584552, |
|
"grad_norm": 0.7425169944763184, |
|
"learning_rate": 9.47002180361799e-05, |
|
"loss": 0.1333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5229645093945721, |
|
"grad_norm": 0.7141383290290833, |
|
"learning_rate": 9.436932184145737e-05, |
|
"loss": 0.4269, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.524008350730689, |
|
"grad_norm": 0.662886917591095, |
|
"learning_rate": 9.403848748301802e-05, |
|
"loss": 0.2939, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.5250521920668059, |
|
"grad_norm": 0.6695585250854492, |
|
"learning_rate": 9.370771859409513e-05, |
|
"loss": 0.3167, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.5260960334029228, |
|
"grad_norm": 0.7355087399482727, |
|
"learning_rate": 9.337701880720303e-05, |
|
"loss": 0.3414, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.5271398747390397, |
|
"grad_norm": 0.6738423705101013, |
|
"learning_rate": 9.304639175409698e-05, |
|
"loss": 0.3176, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5281837160751566, |
|
"grad_norm": 0.7200036644935608, |
|
"learning_rate": 9.271584106573364e-05, |
|
"loss": 0.3276, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.5292275574112735, |
|
"grad_norm": 0.6562217473983765, |
|
"learning_rate": 9.238537037223104e-05, |
|
"loss": 0.3353, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.5302713987473904, |
|
"grad_norm": 0.6065823435783386, |
|
"learning_rate": 9.205498330282856e-05, |
|
"loss": 0.3062, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.5313152400835073, |
|
"grad_norm": 0.6631349921226501, |
|
"learning_rate": 9.172468348584739e-05, |
|
"loss": 0.2497, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.5323590814196242, |
|
"grad_norm": 0.613549530506134, |
|
"learning_rate": 9.139447454865033e-05, |
|
"loss": 0.2625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5334029227557411, |
|
"grad_norm": 0.6613410711288452, |
|
"learning_rate": 9.106436011760229e-05, |
|
"loss": 0.244, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.534446764091858, |
|
"grad_norm": 0.5914390087127686, |
|
"learning_rate": 9.073434381803024e-05, |
|
"loss": 0.2109, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.535490605427975, |
|
"grad_norm": 0.5324224829673767, |
|
"learning_rate": 9.04044292741835e-05, |
|
"loss": 0.1926, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.5365344467640919, |
|
"grad_norm": 0.652651846408844, |
|
"learning_rate": 9.007462010919386e-05, |
|
"loss": 0.2254, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.5375782881002088, |
|
"grad_norm": 0.6112195253372192, |
|
"learning_rate": 8.974491994503584e-05, |
|
"loss": 0.205, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5386221294363257, |
|
"grad_norm": 0.5404685735702515, |
|
"learning_rate": 8.941533240248699e-05, |
|
"loss": 0.1928, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.5396659707724426, |
|
"grad_norm": 0.47451335191726685, |
|
"learning_rate": 8.908586110108794e-05, |
|
"loss": 0.1307, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.5407098121085595, |
|
"grad_norm": 0.8480343818664551, |
|
"learning_rate": 8.875650965910279e-05, |
|
"loss": 0.1533, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.5417536534446764, |
|
"grad_norm": 0.6759589314460754, |
|
"learning_rate": 8.842728169347939e-05, |
|
"loss": 0.1468, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.5427974947807933, |
|
"grad_norm": 0.5591132640838623, |
|
"learning_rate": 8.809818081980953e-05, |
|
"loss": 0.2246, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5438413361169102, |
|
"grad_norm": 0.6184394955635071, |
|
"learning_rate": 8.776921065228924e-05, |
|
"loss": 0.2189, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.5448851774530271, |
|
"grad_norm": 0.5175319910049438, |
|
"learning_rate": 8.744037480367921e-05, |
|
"loss": 0.1886, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.545929018789144, |
|
"grad_norm": 0.645250678062439, |
|
"learning_rate": 8.711167688526493e-05, |
|
"loss": 0.2297, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.5469728601252609, |
|
"grad_norm": 0.6044825315475464, |
|
"learning_rate": 8.678312050681724e-05, |
|
"loss": 0.2029, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.5480167014613778, |
|
"grad_norm": 0.5178519487380981, |
|
"learning_rate": 8.645470927655255e-05, |
|
"loss": 0.1447, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5490605427974948, |
|
"grad_norm": 0.8626076579093933, |
|
"learning_rate": 8.612644680109319e-05, |
|
"loss": 0.4495, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.5501043841336117, |
|
"grad_norm": 0.8363009691238403, |
|
"learning_rate": 8.579833668542796e-05, |
|
"loss": 0.3709, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.5511482254697286, |
|
"grad_norm": 0.872733473777771, |
|
"learning_rate": 8.547038253287233e-05, |
|
"loss": 0.3226, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.5521920668058455, |
|
"grad_norm": 0.865210235118866, |
|
"learning_rate": 8.514258794502905e-05, |
|
"loss": 0.3579, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.5532359081419624, |
|
"grad_norm": 0.7914073467254639, |
|
"learning_rate": 8.481495652174859e-05, |
|
"loss": 0.3216, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5542797494780793, |
|
"grad_norm": 0.8779425024986267, |
|
"learning_rate": 8.448749186108935e-05, |
|
"loss": 0.3532, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.5553235908141962, |
|
"grad_norm": 0.8584082722663879, |
|
"learning_rate": 8.416019755927851e-05, |
|
"loss": 0.4009, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.5563674321503131, |
|
"grad_norm": 0.8470184206962585, |
|
"learning_rate": 8.383307721067231e-05, |
|
"loss": 0.4198, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.55741127348643, |
|
"grad_norm": 0.7338582277297974, |
|
"learning_rate": 8.35061344077166e-05, |
|
"loss": 0.2725, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.558455114822547, |
|
"grad_norm": 0.7652982473373413, |
|
"learning_rate": 8.317937274090747e-05, |
|
"loss": 0.3007, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5594989561586639, |
|
"grad_norm": 0.7415357232093811, |
|
"learning_rate": 8.28527957987517e-05, |
|
"loss": 0.3274, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.5605427974947808, |
|
"grad_norm": 0.6662179231643677, |
|
"learning_rate": 8.252640716772749e-05, |
|
"loss": 0.2606, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.5615866388308977, |
|
"grad_norm": 0.6139498353004456, |
|
"learning_rate": 8.2200210432245e-05, |
|
"loss": 0.2382, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.5626304801670147, |
|
"grad_norm": 0.7014831900596619, |
|
"learning_rate": 8.187420917460686e-05, |
|
"loss": 0.2542, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5636743215031316, |
|
"grad_norm": 0.7138461470603943, |
|
"learning_rate": 8.154840697496917e-05, |
|
"loss": 0.2392, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5647181628392485, |
|
"grad_norm": 0.7507902979850769, |
|
"learning_rate": 8.122280741130176e-05, |
|
"loss": 0.2554, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5657620041753654, |
|
"grad_norm": 0.8535422086715698, |
|
"learning_rate": 8.089741405934922e-05, |
|
"loss": 0.2433, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5668058455114823, |
|
"grad_norm": 0.660111129283905, |
|
"learning_rate": 8.057223049259155e-05, |
|
"loss": 0.1921, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5678496868475992, |
|
"grad_norm": 0.49220120906829834, |
|
"learning_rate": 8.024726028220474e-05, |
|
"loss": 0.1793, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5688935281837161, |
|
"grad_norm": 0.5934033393859863, |
|
"learning_rate": 7.992250699702182e-05, |
|
"loss": 0.1686, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.569937369519833, |
|
"grad_norm": 0.6598916053771973, |
|
"learning_rate": 7.959797420349355e-05, |
|
"loss": 0.1981, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5709812108559499, |
|
"grad_norm": 0.6087566018104553, |
|
"learning_rate": 7.927366546564911e-05, |
|
"loss": 0.1845, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.5720250521920668, |
|
"grad_norm": 0.4998890459537506, |
|
"learning_rate": 7.894958434505725e-05, |
|
"loss": 0.1524, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5730688935281837, |
|
"grad_norm": 0.5460024476051331, |
|
"learning_rate": 7.862573440078686e-05, |
|
"loss": 0.1808, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.5741127348643006, |
|
"grad_norm": 0.7462297677993774, |
|
"learning_rate": 7.83021191893682e-05, |
|
"loss": 0.1723, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5751565762004175, |
|
"grad_norm": 0.5173273086547852, |
|
"learning_rate": 7.797874226475361e-05, |
|
"loss": 0.3054, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5762004175365344, |
|
"grad_norm": 0.6547046303749084, |
|
"learning_rate": 7.765560717827858e-05, |
|
"loss": 0.3101, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5772442588726514, |
|
"grad_norm": 0.676986575126648, |
|
"learning_rate": 7.733271747862265e-05, |
|
"loss": 0.3376, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5782881002087683, |
|
"grad_norm": 0.7368578910827637, |
|
"learning_rate": 7.701007671177067e-05, |
|
"loss": 0.3517, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5793319415448852, |
|
"grad_norm": 0.7136873006820679, |
|
"learning_rate": 7.668768842097353e-05, |
|
"loss": 0.3077, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5803757828810021, |
|
"grad_norm": 0.7121712565422058, |
|
"learning_rate": 7.636555614670953e-05, |
|
"loss": 0.3271, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.581419624217119, |
|
"grad_norm": 0.7123695611953735, |
|
"learning_rate": 7.604368342664533e-05, |
|
"loss": 0.3356, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5824634655532359, |
|
"grad_norm": 0.7206712961196899, |
|
"learning_rate": 7.572207379559721e-05, |
|
"loss": 0.2915, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5835073068893528, |
|
"grad_norm": 0.6520224809646606, |
|
"learning_rate": 7.540073078549221e-05, |
|
"loss": 0.2657, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5845511482254697, |
|
"grad_norm": 0.5960420370101929, |
|
"learning_rate": 7.507965792532921e-05, |
|
"loss": 0.1952, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5855949895615866, |
|
"grad_norm": 0.5834378004074097, |
|
"learning_rate": 7.475885874114047e-05, |
|
"loss": 0.1878, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5866388308977035, |
|
"grad_norm": 0.6201406121253967, |
|
"learning_rate": 7.443833675595255e-05, |
|
"loss": 0.1927, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5876826722338204, |
|
"grad_norm": 0.5904473662376404, |
|
"learning_rate": 7.411809548974792e-05, |
|
"loss": 0.1804, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5887265135699373, |
|
"grad_norm": 0.5292779803276062, |
|
"learning_rate": 7.379813845942623e-05, |
|
"loss": 0.1379, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5897703549060542, |
|
"grad_norm": 0.618929922580719, |
|
"learning_rate": 7.347846917876544e-05, |
|
"loss": 0.1926, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5908141962421712, |
|
"grad_norm": 0.5607888698577881, |
|
"learning_rate": 7.315909115838367e-05, |
|
"loss": 0.1845, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5918580375782881, |
|
"grad_norm": 0.56803297996521, |
|
"learning_rate": 7.284000790570029e-05, |
|
"loss": 0.1762, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.592901878914405, |
|
"grad_norm": 0.5143932700157166, |
|
"learning_rate": 7.252122292489747e-05, |
|
"loss": 0.1514, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.593945720250522, |
|
"grad_norm": 0.6080281734466553, |
|
"learning_rate": 7.220273971688192e-05, |
|
"loss": 0.2516, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5949895615866388, |
|
"grad_norm": 0.6721866130828857, |
|
"learning_rate": 7.188456177924605e-05, |
|
"loss": 0.2707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5960334029227558, |
|
"grad_norm": 0.5116624236106873, |
|
"learning_rate": 7.156669260622996e-05, |
|
"loss": 0.2083, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5970772442588727, |
|
"grad_norm": 0.5874140858650208, |
|
"learning_rate": 7.124913568868287e-05, |
|
"loss": 0.1598, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5981210855949896, |
|
"grad_norm": 0.506820559501648, |
|
"learning_rate": 7.093189451402469e-05, |
|
"loss": 0.1572, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5991649269311065, |
|
"grad_norm": 0.7033873796463013, |
|
"learning_rate": 7.061497256620793e-05, |
|
"loss": 0.2867, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6002087682672234, |
|
"grad_norm": 0.7932460904121399, |
|
"learning_rate": 7.029837332567927e-05, |
|
"loss": 0.2998, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6012526096033403, |
|
"grad_norm": 0.8427619934082031, |
|
"learning_rate": 6.998210026934148e-05, |
|
"loss": 0.426, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.6022964509394572, |
|
"grad_norm": 0.7818666100502014, |
|
"learning_rate": 6.966615687051516e-05, |
|
"loss": 0.3559, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.6033402922755741, |
|
"grad_norm": 0.821897566318512, |
|
"learning_rate": 6.935054659890052e-05, |
|
"loss": 0.3928, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.6043841336116911, |
|
"grad_norm": 0.7375624179840088, |
|
"learning_rate": 6.903527292053942e-05, |
|
"loss": 0.3203, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.605427974947808, |
|
"grad_norm": 0.7323412299156189, |
|
"learning_rate": 6.87203392977773e-05, |
|
"loss": 0.3014, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6064718162839249, |
|
"grad_norm": 0.8238475322723389, |
|
"learning_rate": 6.840574918922493e-05, |
|
"loss": 0.3447, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.6075156576200418, |
|
"grad_norm": 0.7970190644264221, |
|
"learning_rate": 6.809150604972079e-05, |
|
"loss": 0.3556, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6085594989561587, |
|
"grad_norm": 0.718948483467102, |
|
"learning_rate": 6.777761333029275e-05, |
|
"loss": 0.318, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.6096033402922756, |
|
"grad_norm": 0.8113385438919067, |
|
"learning_rate": 6.746407447812049e-05, |
|
"loss": 0.2928, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.6106471816283925, |
|
"grad_norm": 0.732028603553772, |
|
"learning_rate": 6.715089293649752e-05, |
|
"loss": 0.2688, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6116910229645094, |
|
"grad_norm": 0.693304181098938, |
|
"learning_rate": 6.683807214479323e-05, |
|
"loss": 0.2421, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.6127348643006263, |
|
"grad_norm": 0.8302125334739685, |
|
"learning_rate": 6.652561553841537e-05, |
|
"loss": 0.2625, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.6137787056367432, |
|
"grad_norm": 0.6446481943130493, |
|
"learning_rate": 6.621352654877207e-05, |
|
"loss": 0.2261, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.6148225469728601, |
|
"grad_norm": 0.7838292717933655, |
|
"learning_rate": 6.59018086032344e-05, |
|
"loss": 0.2619, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.615866388308977, |
|
"grad_norm": 0.6317050457000732, |
|
"learning_rate": 6.55904651250986e-05, |
|
"loss": 0.1996, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6169102296450939, |
|
"grad_norm": 0.6110920310020447, |
|
"learning_rate": 6.527949953354835e-05, |
|
"loss": 0.2078, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.6179540709812108, |
|
"grad_norm": 0.5425273180007935, |
|
"learning_rate": 6.496891524361757e-05, |
|
"loss": 0.1852, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.6189979123173278, |
|
"grad_norm": 0.7897228002548218, |
|
"learning_rate": 6.465871566615263e-05, |
|
"loss": 0.2217, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.6200417536534447, |
|
"grad_norm": 0.6448274254798889, |
|
"learning_rate": 6.434890420777491e-05, |
|
"loss": 0.1918, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.6210855949895616, |
|
"grad_norm": 0.690799355506897, |
|
"learning_rate": 6.403948427084356e-05, |
|
"loss": 0.2129, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6221294363256785, |
|
"grad_norm": 0.6128472685813904, |
|
"learning_rate": 6.373045925341794e-05, |
|
"loss": 0.196, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.6231732776617954, |
|
"grad_norm": 0.4894169270992279, |
|
"learning_rate": 6.342183254922046e-05, |
|
"loss": 0.1341, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.6242171189979123, |
|
"grad_norm": 0.5475450754165649, |
|
"learning_rate": 6.311360754759923e-05, |
|
"loss": 0.1655, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.6252609603340292, |
|
"grad_norm": 0.5742066502571106, |
|
"learning_rate": 6.280578763349078e-05, |
|
"loss": 0.2158, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.6263048016701461, |
|
"grad_norm": 0.6302378177642822, |
|
"learning_rate": 6.249837618738311e-05, |
|
"loss": 0.1211, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.627348643006263, |
|
"grad_norm": 0.6498920321464539, |
|
"learning_rate": 6.219137658527818e-05, |
|
"loss": 0.3415, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.6283924843423799, |
|
"grad_norm": 0.5803071856498718, |
|
"learning_rate": 6.188479219865529e-05, |
|
"loss": 0.3446, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.6294363256784968, |
|
"grad_norm": 0.7242146134376526, |
|
"learning_rate": 6.157862639443374e-05, |
|
"loss": 0.3406, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.6304801670146137, |
|
"grad_norm": 0.741543710231781, |
|
"learning_rate": 6.127288253493591e-05, |
|
"loss": 0.2851, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.6315240083507306, |
|
"grad_norm": 0.6710807681083679, |
|
"learning_rate": 6.09675639778504e-05, |
|
"loss": 0.2975, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6325678496868476, |
|
"grad_norm": 0.6401992440223694, |
|
"learning_rate": 6.0662674076195194e-05, |
|
"loss": 0.2934, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.6336116910229646, |
|
"grad_norm": 0.7866775393486023, |
|
"learning_rate": 6.03582161782806e-05, |
|
"loss": 0.3303, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.6346555323590815, |
|
"grad_norm": 0.6878888607025146, |
|
"learning_rate": 6.005419362767286e-05, |
|
"loss": 0.2885, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.6356993736951984, |
|
"grad_norm": 0.667226254940033, |
|
"learning_rate": 5.975060976315703e-05, |
|
"loss": 0.2663, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.6367432150313153, |
|
"grad_norm": 0.5909189581871033, |
|
"learning_rate": 5.9447467918700614e-05, |
|
"loss": 0.2023, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6377870563674322, |
|
"grad_norm": 0.6986932158470154, |
|
"learning_rate": 5.9144771423416826e-05, |
|
"loss": 0.2354, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.6388308977035491, |
|
"grad_norm": 0.5562401413917542, |
|
"learning_rate": 5.8842523601528e-05, |
|
"loss": 0.1928, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.639874739039666, |
|
"grad_norm": 0.5309166312217712, |
|
"learning_rate": 5.854072777232914e-05, |
|
"loss": 0.1611, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.6409185803757829, |
|
"grad_norm": 0.4029114842414856, |
|
"learning_rate": 5.823938725015148e-05, |
|
"loss": 0.14, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.6419624217118998, |
|
"grad_norm": 0.4246949851512909, |
|
"learning_rate": 5.793850534432599e-05, |
|
"loss": 0.1194, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6430062630480167, |
|
"grad_norm": 0.44221794605255127, |
|
"learning_rate": 5.763808535914723e-05, |
|
"loss": 0.121, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.6440501043841336, |
|
"grad_norm": 0.40379002690315247, |
|
"learning_rate": 5.7338130593836755e-05, |
|
"loss": 0.1215, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.6450939457202505, |
|
"grad_norm": 0.6201443076133728, |
|
"learning_rate": 5.7038644342507205e-05, |
|
"loss": 0.1686, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.6461377870563675, |
|
"grad_norm": 0.7134044766426086, |
|
"learning_rate": 5.673962989412599e-05, |
|
"loss": 0.3048, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.6471816283924844, |
|
"grad_norm": 0.5085525512695312, |
|
"learning_rate": 5.644109053247901e-05, |
|
"loss": 0.2137, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6482254697286013, |
|
"grad_norm": 0.5813112854957581, |
|
"learning_rate": 5.614302953613489e-05, |
|
"loss": 0.2164, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.6492693110647182, |
|
"grad_norm": 0.5314549803733826, |
|
"learning_rate": 5.584545017840885e-05, |
|
"loss": 0.1781, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.6503131524008351, |
|
"grad_norm": 0.4466283619403839, |
|
"learning_rate": 5.5548355727326574e-05, |
|
"loss": 0.1564, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.651356993736952, |
|
"grad_norm": 0.7003150582313538, |
|
"learning_rate": 5.525174944558866e-05, |
|
"loss": 0.3604, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.6524008350730689, |
|
"grad_norm": 0.7904312610626221, |
|
"learning_rate": 5.4955634590534545e-05, |
|
"loss": 0.3483, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6534446764091858, |
|
"grad_norm": 0.7673099637031555, |
|
"learning_rate": 5.466001441410682e-05, |
|
"loss": 0.3912, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.6544885177453027, |
|
"grad_norm": 0.823867678642273, |
|
"learning_rate": 5.4364892162815436e-05, |
|
"loss": 0.3618, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.6555323590814196, |
|
"grad_norm": 0.6855948567390442, |
|
"learning_rate": 5.407027107770219e-05, |
|
"loss": 0.2816, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.6565762004175365, |
|
"grad_norm": 0.7723731994628906, |
|
"learning_rate": 5.377615439430508e-05, |
|
"loss": 0.3292, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.6576200417536534, |
|
"grad_norm": 0.7081869840621948, |
|
"learning_rate": 5.348254534262262e-05, |
|
"loss": 0.3232, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6586638830897703, |
|
"grad_norm": 0.7101826071739197, |
|
"learning_rate": 5.318944714707861e-05, |
|
"loss": 0.3557, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.6597077244258872, |
|
"grad_norm": 0.7043560147285461, |
|
"learning_rate": 5.289686302648661e-05, |
|
"loss": 0.3251, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.6607515657620042, |
|
"grad_norm": 0.8865169286727905, |
|
"learning_rate": 5.2604796194014507e-05, |
|
"loss": 0.3514, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.6617954070981211, |
|
"grad_norm": 0.8106626868247986, |
|
"learning_rate": 5.2313249857149414e-05, |
|
"loss": 0.3226, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.662839248434238, |
|
"grad_norm": 0.7511535286903381, |
|
"learning_rate": 5.202222721766226e-05, |
|
"loss": 0.3186, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6638830897703549, |
|
"grad_norm": 0.7110910415649414, |
|
"learning_rate": 5.1731731471572755e-05, |
|
"loss": 0.29, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.6649269311064718, |
|
"grad_norm": 0.7598642110824585, |
|
"learning_rate": 5.144176580911431e-05, |
|
"loss": 0.2552, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.6659707724425887, |
|
"grad_norm": 0.5399108529090881, |
|
"learning_rate": 5.115233341469877e-05, |
|
"loss": 0.2105, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.6670146137787056, |
|
"grad_norm": 0.6335327625274658, |
|
"learning_rate": 5.0863437466881836e-05, |
|
"loss": 0.2272, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.6680584551148225, |
|
"grad_norm": 0.6470877528190613, |
|
"learning_rate": 5.0575081138327715e-05, |
|
"loss": 0.2329, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6691022964509394, |
|
"grad_norm": 0.5501940250396729, |
|
"learning_rate": 5.028726759577467e-05, |
|
"loss": 0.2057, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.6701461377870563, |
|
"grad_norm": 0.48475509881973267, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.1643, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.6711899791231732, |
|
"grad_norm": 0.8148300051689148, |
|
"learning_rate": 4.97132815057854e-05, |
|
"loss": 0.2454, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.6722338204592901, |
|
"grad_norm": 0.47534969449043274, |
|
"learning_rate": 4.942711526188229e-05, |
|
"loss": 0.1713, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.673277661795407, |
|
"grad_norm": 0.5151733160018921, |
|
"learning_rate": 4.914150441097736e-05, |
|
"loss": 0.1701, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6743215031315241, |
|
"grad_norm": 0.5037069916725159, |
|
"learning_rate": 4.885645208965779e-05, |
|
"loss": 0.1814, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.675365344467641, |
|
"grad_norm": 0.4882695972919464, |
|
"learning_rate": 4.857196142837716e-05, |
|
"loss": 0.1685, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.6764091858037579, |
|
"grad_norm": 0.614020586013794, |
|
"learning_rate": 4.8288035551420697e-05, |
|
"loss": 0.1878, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.6774530271398748, |
|
"grad_norm": 0.44983476400375366, |
|
"learning_rate": 4.80046775768713e-05, |
|
"loss": 0.1455, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.6784968684759917, |
|
"grad_norm": 0.7257928252220154, |
|
"learning_rate": 4.7721890616575103e-05, |
|
"loss": 0.1417, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6795407098121086, |
|
"grad_norm": 0.5455628633499146, |
|
"learning_rate": 4.743967777610727e-05, |
|
"loss": 0.298, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.6805845511482255, |
|
"grad_norm": 0.6038815379142761, |
|
"learning_rate": 4.715804215473809e-05, |
|
"loss": 0.322, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6816283924843424, |
|
"grad_norm": 0.6264936327934265, |
|
"learning_rate": 4.687698684539866e-05, |
|
"loss": 0.3185, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.6826722338204593, |
|
"grad_norm": 0.6083415150642395, |
|
"learning_rate": 4.659651493464721e-05, |
|
"loss": 0.2614, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6837160751565762, |
|
"grad_norm": 0.616062343120575, |
|
"learning_rate": 4.6316629502635025e-05, |
|
"loss": 0.2663, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6847599164926931, |
|
"grad_norm": 0.6474526524543762, |
|
"learning_rate": 4.603733362307261e-05, |
|
"loss": 0.3216, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.68580375782881, |
|
"grad_norm": 0.6374465227127075, |
|
"learning_rate": 4.575863036319604e-05, |
|
"loss": 0.2995, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.6868475991649269, |
|
"grad_norm": 0.627227246761322, |
|
"learning_rate": 4.548052278373327e-05, |
|
"loss": 0.2399, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.6878914405010439, |
|
"grad_norm": 0.6535525321960449, |
|
"learning_rate": 4.520301393887032e-05, |
|
"loss": 0.2309, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.6889352818371608, |
|
"grad_norm": 0.5703950524330139, |
|
"learning_rate": 4.492610687621804e-05, |
|
"loss": 0.2128, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6899791231732777, |
|
"grad_norm": 0.5039522647857666, |
|
"learning_rate": 4.4649804636778456e-05, |
|
"loss": 0.1812, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.6910229645093946, |
|
"grad_norm": 0.6118776798248291, |
|
"learning_rate": 4.4374110254911306e-05, |
|
"loss": 0.225, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.6920668058455115, |
|
"grad_norm": 0.4194796085357666, |
|
"learning_rate": 4.4099026758300944e-05, |
|
"loss": 0.1415, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.6931106471816284, |
|
"grad_norm": 0.5777245163917542, |
|
"learning_rate": 4.382455716792291e-05, |
|
"loss": 0.2032, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.6941544885177453, |
|
"grad_norm": 0.41527435183525085, |
|
"learning_rate": 4.355070449801083e-05, |
|
"loss": 0.1205, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6951983298538622, |
|
"grad_norm": 0.502178430557251, |
|
"learning_rate": 4.32774717560232e-05, |
|
"loss": 0.1514, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.6962421711899791, |
|
"grad_norm": 0.5909983515739441, |
|
"learning_rate": 4.300486194261057e-05, |
|
"loss": 0.176, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.697286012526096, |
|
"grad_norm": 0.48378539085388184, |
|
"learning_rate": 4.273287805158245e-05, |
|
"loss": 0.1627, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6983298538622129, |
|
"grad_norm": 0.6273384094238281, |
|
"learning_rate": 4.2461523069874346e-05, |
|
"loss": 0.2523, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6993736951983298, |
|
"grad_norm": 0.5280055403709412, |
|
"learning_rate": 4.219079997751515e-05, |
|
"loss": 0.2035, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7004175365344467, |
|
"grad_norm": 0.539364755153656, |
|
"learning_rate": 4.192071174759435e-05, |
|
"loss": 0.1896, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.7014613778705637, |
|
"grad_norm": 0.5403777956962585, |
|
"learning_rate": 4.165126134622926e-05, |
|
"loss": 0.1624, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.7025052192066806, |
|
"grad_norm": 0.448177695274353, |
|
"learning_rate": 4.1382451732532665e-05, |
|
"loss": 0.1308, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.7035490605427975, |
|
"grad_norm": 0.6774344444274902, |
|
"learning_rate": 4.1114285858580045e-05, |
|
"loss": 0.3161, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.7045929018789144, |
|
"grad_norm": 0.7080472707748413, |
|
"learning_rate": 4.0846766669377446e-05, |
|
"loss": 0.3357, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7056367432150313, |
|
"grad_norm": 0.6589325666427612, |
|
"learning_rate": 4.0579897102828966e-05, |
|
"loss": 0.2815, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.7066805845511482, |
|
"grad_norm": 0.7149707078933716, |
|
"learning_rate": 4.0313680089704454e-05, |
|
"loss": 0.3612, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.7077244258872651, |
|
"grad_norm": 0.6252415776252747, |
|
"learning_rate": 4.004811855360748e-05, |
|
"loss": 0.2726, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.708768267223382, |
|
"grad_norm": 0.7726844549179077, |
|
"learning_rate": 3.9783215410943174e-05, |
|
"loss": 0.3229, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.7098121085594989, |
|
"grad_norm": 0.7369757890701294, |
|
"learning_rate": 3.951897357088602e-05, |
|
"loss": 0.3436, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7108559498956158, |
|
"grad_norm": 0.788517951965332, |
|
"learning_rate": 3.925539593534824e-05, |
|
"loss": 0.312, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.7118997912317327, |
|
"grad_norm": 0.7983633875846863, |
|
"learning_rate": 3.899248539894757e-05, |
|
"loss": 0.361, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.7129436325678496, |
|
"grad_norm": 0.7084015607833862, |
|
"learning_rate": 3.873024484897576e-05, |
|
"loss": 0.2836, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.7139874739039666, |
|
"grad_norm": 0.7584156394004822, |
|
"learning_rate": 3.8468677165366754e-05, |
|
"loss": 0.2955, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.7150313152400835, |
|
"grad_norm": 0.7465482354164124, |
|
"learning_rate": 3.820778522066494e-05, |
|
"loss": 0.2564, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.7160751565762005, |
|
"grad_norm": 0.6189156770706177, |
|
"learning_rate": 3.794757187999386e-05, |
|
"loss": 0.221, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.7171189979123174, |
|
"grad_norm": 0.6960480809211731, |
|
"learning_rate": 3.7688040001024475e-05, |
|
"loss": 0.2522, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.7181628392484343, |
|
"grad_norm": 0.572296679019928, |
|
"learning_rate": 3.7429192433944014e-05, |
|
"loss": 0.1997, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.7192066805845512, |
|
"grad_norm": 0.6478104591369629, |
|
"learning_rate": 3.717103202142457e-05, |
|
"loss": 0.2126, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.7202505219206681, |
|
"grad_norm": 0.7389695644378662, |
|
"learning_rate": 3.691356159859177e-05, |
|
"loss": 0.2333, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.721294363256785, |
|
"grad_norm": 0.5489468574523926, |
|
"learning_rate": 3.665678399299388e-05, |
|
"loss": 0.1701, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.7223382045929019, |
|
"grad_norm": 0.7258986830711365, |
|
"learning_rate": 3.64007020245706e-05, |
|
"loss": 0.2457, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.7233820459290188, |
|
"grad_norm": 0.6785321235656738, |
|
"learning_rate": 3.614531850562203e-05, |
|
"loss": 0.1936, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.7244258872651357, |
|
"grad_norm": 0.612426221370697, |
|
"learning_rate": 3.589063624077802e-05, |
|
"loss": 0.2403, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.7254697286012526, |
|
"grad_norm": 0.5947864651679993, |
|
"learning_rate": 3.563665802696707e-05, |
|
"loss": 0.1743, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.7265135699373695, |
|
"grad_norm": 0.6579543352127075, |
|
"learning_rate": 3.538338665338589e-05, |
|
"loss": 0.1928, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.7275574112734864, |
|
"grad_norm": 0.5460782647132874, |
|
"learning_rate": 3.513082490146864e-05, |
|
"loss": 0.1655, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.7286012526096033, |
|
"grad_norm": 0.7640422582626343, |
|
"learning_rate": 3.487897554485628e-05, |
|
"loss": 0.1659, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.7296450939457203, |
|
"grad_norm": 0.7361250519752502, |
|
"learning_rate": 3.462784134936636e-05, |
|
"loss": 0.1992, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.7306889352818372, |
|
"grad_norm": 1.2653623819351196, |
|
"learning_rate": 3.4377425072962465e-05, |
|
"loss": 0.1184, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7317327766179541, |
|
"grad_norm": 0.6173591613769531, |
|
"learning_rate": 3.412772946572389e-05, |
|
"loss": 0.4121, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.732776617954071, |
|
"grad_norm": 0.5525224208831787, |
|
"learning_rate": 3.387875726981563e-05, |
|
"loss": 0.2601, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.7338204592901879, |
|
"grad_norm": 0.6985558867454529, |
|
"learning_rate": 3.363051121945809e-05, |
|
"loss": 0.3448, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.7348643006263048, |
|
"grad_norm": 0.580680251121521, |
|
"learning_rate": 3.3382994040897196e-05, |
|
"loss": 0.2642, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.7359081419624217, |
|
"grad_norm": 0.5876568555831909, |
|
"learning_rate": 3.3136208452374254e-05, |
|
"loss": 0.271, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7369519832985386, |
|
"grad_norm": 0.6181269884109497, |
|
"learning_rate": 3.289015716409631e-05, |
|
"loss": 0.2522, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.7379958246346555, |
|
"grad_norm": 0.6312392354011536, |
|
"learning_rate": 3.264484287820634e-05, |
|
"loss": 0.2735, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.7390396659707724, |
|
"grad_norm": 0.6163091063499451, |
|
"learning_rate": 3.2400268288753425e-05, |
|
"loss": 0.2415, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.7400835073068893, |
|
"grad_norm": 0.6303150057792664, |
|
"learning_rate": 3.2156436081663356e-05, |
|
"loss": 0.2495, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.7411273486430062, |
|
"grad_norm": 0.6544148325920105, |
|
"learning_rate": 3.191334893470907e-05, |
|
"loss": 0.2445, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7421711899791231, |
|
"grad_norm": 0.468227744102478, |
|
"learning_rate": 3.167100951748115e-05, |
|
"loss": 0.1481, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.7432150313152401, |
|
"grad_norm": 0.5340932607650757, |
|
"learning_rate": 3.14294204913587e-05, |
|
"loss": 0.1431, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.744258872651357, |
|
"grad_norm": 0.5851957201957703, |
|
"learning_rate": 3.1188584509479866e-05, |
|
"loss": 0.1737, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.7453027139874739, |
|
"grad_norm": 0.5840248465538025, |
|
"learning_rate": 3.094850421671295e-05, |
|
"loss": 0.1752, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.7463465553235908, |
|
"grad_norm": 0.5659369826316833, |
|
"learning_rate": 3.0709182249627255e-05, |
|
"loss": 0.1967, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7473903966597077, |
|
"grad_norm": 0.457015722990036, |
|
"learning_rate": 3.0470621236464036e-05, |
|
"loss": 0.1544, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.7484342379958246, |
|
"grad_norm": 0.5182324647903442, |
|
"learning_rate": 3.023282379710779e-05, |
|
"loss": 0.1414, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.7494780793319415, |
|
"grad_norm": 0.5334721207618713, |
|
"learning_rate": 2.9995792543057478e-05, |
|
"loss": 0.1615, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.7505219206680585, |
|
"grad_norm": 0.6061464548110962, |
|
"learning_rate": 2.9759530077397636e-05, |
|
"loss": 0.2452, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.7515657620041754, |
|
"grad_norm": 0.5774762630462646, |
|
"learning_rate": 2.9524038994770107e-05, |
|
"loss": 0.2234, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7515657620041754, |
|
"eval_loss": 0.21803000569343567, |
|
"eval_runtime": 81.2533, |
|
"eval_samples_per_second": 19.864, |
|
"eval_steps_per_second": 9.932, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7526096033402923, |
|
"grad_norm": 0.43510791659355164, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.1719, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.7536534446764092, |
|
"grad_norm": 0.4775535762310028, |
|
"learning_rate": 2.905538131479376e-05, |
|
"loss": 0.1874, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.7546972860125261, |
|
"grad_norm": 0.5398250818252563, |
|
"learning_rate": 2.8822219864258272e-05, |
|
"loss": 0.1667, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.755741127348643, |
|
"grad_norm": 0.4013484716415405, |
|
"learning_rate": 2.8589840090325027e-05, |
|
"loss": 0.1252, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.7567849686847599, |
|
"grad_norm": 0.8314927816390991, |
|
"learning_rate": 2.8358244544996038e-05, |
|
"loss": 0.3924, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7578288100208769, |
|
"grad_norm": 0.8482892513275146, |
|
"learning_rate": 2.8127435771660747e-05, |
|
"loss": 0.3675, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.7588726513569938, |
|
"grad_norm": 0.6500058770179749, |
|
"learning_rate": 2.7897416305068323e-05, |
|
"loss": 0.3016, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.7599164926931107, |
|
"grad_norm": 0.6976576447486877, |
|
"learning_rate": 2.7668188671299755e-05, |
|
"loss": 0.3133, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.7609603340292276, |
|
"grad_norm": 0.7735400795936584, |
|
"learning_rate": 2.743975538774002e-05, |
|
"loss": 0.3557, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.7620041753653445, |
|
"grad_norm": 0.7905307412147522, |
|
"learning_rate": 2.7212118963050592e-05, |
|
"loss": 0.3316, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7630480167014614, |
|
"grad_norm": 0.7098974585533142, |
|
"learning_rate": 2.6985281897141812e-05, |
|
"loss": 0.2877, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.7640918580375783, |
|
"grad_norm": 0.6374404430389404, |
|
"learning_rate": 2.675924668114537e-05, |
|
"loss": 0.2589, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.7651356993736952, |
|
"grad_norm": 0.7561594247817993, |
|
"learning_rate": 2.65340157973871e-05, |
|
"loss": 0.2953, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.7661795407098121, |
|
"grad_norm": 0.7232580780982971, |
|
"learning_rate": 2.630959171935956e-05, |
|
"loss": 0.3002, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.767223382045929, |
|
"grad_norm": 0.7140358686447144, |
|
"learning_rate": 2.6085976911694987e-05, |
|
"loss": 0.2545, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7682672233820459, |
|
"grad_norm": 0.749450147151947, |
|
"learning_rate": 2.586317383013821e-05, |
|
"loss": 0.25, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.7693110647181628, |
|
"grad_norm": 0.847427248954773, |
|
"learning_rate": 2.564118492151957e-05, |
|
"loss": 0.2887, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.7703549060542797, |
|
"grad_norm": 0.5772815346717834, |
|
"learning_rate": 2.5420012623728208e-05, |
|
"loss": 0.2101, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.7713987473903967, |
|
"grad_norm": 0.6374946236610413, |
|
"learning_rate": 2.5199659365685235e-05, |
|
"loss": 0.2193, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.7724425887265136, |
|
"grad_norm": 0.642776370048523, |
|
"learning_rate": 2.4980127567316948e-05, |
|
"loss": 0.2168, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7734864300626305, |
|
"grad_norm": 0.6233210563659668, |
|
"learning_rate": 2.4761419639528437e-05, |
|
"loss": 0.1986, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.7745302713987474, |
|
"grad_norm": 0.47856077551841736, |
|
"learning_rate": 2.4543537984176978e-05, |
|
"loss": 0.1681, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.7755741127348643, |
|
"grad_norm": 0.5664119720458984, |
|
"learning_rate": 2.4326484994045752e-05, |
|
"loss": 0.1847, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.7766179540709812, |
|
"grad_norm": 0.5382654666900635, |
|
"learning_rate": 2.4110263052817394e-05, |
|
"loss": 0.1765, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.7776617954070981, |
|
"grad_norm": 0.46047693490982056, |
|
"learning_rate": 2.3894874535048063e-05, |
|
"loss": 0.1487, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.778705636743215, |
|
"grad_norm": 0.5149843096733093, |
|
"learning_rate": 2.368032180614118e-05, |
|
"loss": 0.1817, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.7797494780793319, |
|
"grad_norm": 0.4655948579311371, |
|
"learning_rate": 2.346660722232148e-05, |
|
"loss": 0.1548, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.7807933194154488, |
|
"grad_norm": 0.5313979387283325, |
|
"learning_rate": 2.325373313060919e-05, |
|
"loss": 0.1504, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.7818371607515657, |
|
"grad_norm": 0.5336787700653076, |
|
"learning_rate": 2.3041701868794287e-05, |
|
"loss": 0.1306, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.7828810020876826, |
|
"grad_norm": 0.7393002510070801, |
|
"learning_rate": 2.2830515765410622e-05, |
|
"loss": 0.1137, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7839248434237995, |
|
"grad_norm": 0.5598475933074951, |
|
"learning_rate": 2.262017713971063e-05, |
|
"loss": 0.3354, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.7849686847599165, |
|
"grad_norm": 0.5945207476615906, |
|
"learning_rate": 2.2410688301639616e-05, |
|
"loss": 0.2804, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.7860125260960334, |
|
"grad_norm": 0.4905988574028015, |
|
"learning_rate": 2.2202051551810565e-05, |
|
"loss": 0.2229, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.7870563674321504, |
|
"grad_norm": 0.607475996017456, |
|
"learning_rate": 2.19942691814788e-05, |
|
"loss": 0.289, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.7881002087682673, |
|
"grad_norm": 0.6597141027450562, |
|
"learning_rate": 2.178734347251673e-05, |
|
"loss": 0.2965, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7891440501043842, |
|
"grad_norm": 0.6289554238319397, |
|
"learning_rate": 2.1581276697388975e-05, |
|
"loss": 0.2582, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.7901878914405011, |
|
"grad_norm": 0.6147776246070862, |
|
"learning_rate": 2.1376071119127338e-05, |
|
"loss": 0.2848, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.791231732776618, |
|
"grad_norm": 0.5135255455970764, |
|
"learning_rate": 2.1171728991305795e-05, |
|
"loss": 0.2306, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.7922755741127349, |
|
"grad_norm": 0.5475291013717651, |
|
"learning_rate": 2.0968252558016055e-05, |
|
"loss": 0.2394, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.7933194154488518, |
|
"grad_norm": 0.6322019696235657, |
|
"learning_rate": 2.076564405384258e-05, |
|
"loss": 0.2066, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7943632567849687, |
|
"grad_norm": 0.5838301181793213, |
|
"learning_rate": 2.0563905703838316e-05, |
|
"loss": 0.2321, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7954070981210856, |
|
"grad_norm": 0.47901853919029236, |
|
"learning_rate": 2.0363039723500156e-05, |
|
"loss": 0.1445, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.7964509394572025, |
|
"grad_norm": 0.6136653423309326, |
|
"learning_rate": 2.0163048318744493e-05, |
|
"loss": 0.2168, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7974947807933194, |
|
"grad_norm": 0.49966001510620117, |
|
"learning_rate": 1.9963933685883253e-05, |
|
"loss": 0.1414, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.7985386221294363, |
|
"grad_norm": 0.5253435373306274, |
|
"learning_rate": 1.9765698011599466e-05, |
|
"loss": 0.1513, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7995824634655533, |
|
"grad_norm": 0.3400777578353882, |
|
"learning_rate": 1.9568343472923524e-05, |
|
"loss": 0.1112, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.8006263048016702, |
|
"grad_norm": 0.5851226449012756, |
|
"learning_rate": 1.9371872237209165e-05, |
|
"loss": 0.1619, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.8016701461377871, |
|
"grad_norm": 0.30553382635116577, |
|
"learning_rate": 1.917628646210957e-05, |
|
"loss": 0.0872, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.802713987473904, |
|
"grad_norm": 0.5676819682121277, |
|
"learning_rate": 1.8981588295553853e-05, |
|
"loss": 0.2251, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.8037578288100209, |
|
"grad_norm": 0.5648460984230042, |
|
"learning_rate": 1.878777987572339e-05, |
|
"loss": 0.2181, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8048016701461378, |
|
"grad_norm": 0.607913076877594, |
|
"learning_rate": 1.8594863331028224e-05, |
|
"loss": 0.2216, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.8058455114822547, |
|
"grad_norm": 0.49821627140045166, |
|
"learning_rate": 1.840284078008393e-05, |
|
"loss": 0.1845, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.8068893528183716, |
|
"grad_norm": 0.42891865968704224, |
|
"learning_rate": 1.821171433168809e-05, |
|
"loss": 0.1302, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.8079331941544885, |
|
"grad_norm": 0.8176518678665161, |
|
"learning_rate": 1.8021486084797368e-05, |
|
"loss": 0.3043, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.8089770354906054, |
|
"grad_norm": 0.6164413690567017, |
|
"learning_rate": 1.7832158128504328e-05, |
|
"loss": 0.2472, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8100208768267223, |
|
"grad_norm": 0.7422143220901489, |
|
"learning_rate": 1.7643732542014434e-05, |
|
"loss": 0.3395, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.8110647181628392, |
|
"grad_norm": 0.7129305601119995, |
|
"learning_rate": 1.7456211394623378e-05, |
|
"loss": 0.2934, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.8121085594989561, |
|
"grad_norm": 0.7638242840766907, |
|
"learning_rate": 1.7269596745694295e-05, |
|
"loss": 0.3399, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.8131524008350731, |
|
"grad_norm": 0.624290943145752, |
|
"learning_rate": 1.7083890644635014e-05, |
|
"loss": 0.2525, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.81419624217119, |
|
"grad_norm": 0.6761390566825867, |
|
"learning_rate": 1.6899095130875774e-05, |
|
"loss": 0.2753, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8152400835073069, |
|
"grad_norm": 0.8017570972442627, |
|
"learning_rate": 1.6715212233846655e-05, |
|
"loss": 0.3149, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.8162839248434238, |
|
"grad_norm": 0.6984527111053467, |
|
"learning_rate": 1.6532243972955398e-05, |
|
"loss": 0.3103, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.8173277661795407, |
|
"grad_norm": 0.6139991283416748, |
|
"learning_rate": 1.635019235756511e-05, |
|
"loss": 0.2274, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.8183716075156576, |
|
"grad_norm": 0.7596770524978638, |
|
"learning_rate": 1.616905938697234e-05, |
|
"loss": 0.2612, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.8194154488517745, |
|
"grad_norm": 0.685114324092865, |
|
"learning_rate": 1.5988847050385037e-05, |
|
"loss": 0.2542, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.8204592901878914, |
|
"grad_norm": 0.6803342700004578, |
|
"learning_rate": 1.5809557326900647e-05, |
|
"loss": 0.2267, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.8215031315240083, |
|
"grad_norm": 0.6194560527801514, |
|
"learning_rate": 1.5631192185484554e-05, |
|
"loss": 0.2029, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.8225469728601252, |
|
"grad_norm": 0.8063942193984985, |
|
"learning_rate": 1.5453753584948328e-05, |
|
"loss": 0.2735, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.8235908141962421, |
|
"grad_norm": 0.6718615293502808, |
|
"learning_rate": 1.527724347392815e-05, |
|
"loss": 0.2278, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.824634655532359, |
|
"grad_norm": 0.6082893013954163, |
|
"learning_rate": 1.5101663790863596e-05, |
|
"loss": 0.1969, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8256784968684759, |
|
"grad_norm": 0.7025957703590393, |
|
"learning_rate": 1.4927016463976263e-05, |
|
"loss": 0.2071, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.826722338204593, |
|
"grad_norm": 0.5405511260032654, |
|
"learning_rate": 1.4753303411248475e-05, |
|
"loss": 0.191, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.8277661795407099, |
|
"grad_norm": 0.6413715481758118, |
|
"learning_rate": 1.4580526540402461e-05, |
|
"loss": 0.185, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.8288100208768268, |
|
"grad_norm": 0.6367815136909485, |
|
"learning_rate": 1.4408687748879156e-05, |
|
"loss": 0.221, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.8298538622129437, |
|
"grad_norm": 0.4684351086616516, |
|
"learning_rate": 1.4237788923817553e-05, |
|
"loss": 0.1415, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.8308977035490606, |
|
"grad_norm": 0.5200543403625488, |
|
"learning_rate": 1.4067831942033904e-05, |
|
"loss": 0.1648, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.8319415448851775, |
|
"grad_norm": 0.44216129183769226, |
|
"learning_rate": 1.3898818670001034e-05, |
|
"loss": 0.1608, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.8329853862212944, |
|
"grad_norm": 0.5650377869606018, |
|
"learning_rate": 1.3730750963828032e-05, |
|
"loss": 0.1585, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.8340292275574113, |
|
"grad_norm": 0.5171220898628235, |
|
"learning_rate": 1.3563630669239624e-05, |
|
"loss": 0.1575, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.8350730688935282, |
|
"grad_norm": 0.5441738367080688, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.0949, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8361169102296451, |
|
"grad_norm": 0.6055110096931458, |
|
"learning_rate": 1.3232239645673217e-05, |
|
"loss": 0.4181, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.837160751565762, |
|
"grad_norm": 0.6455709338188171, |
|
"learning_rate": 1.3067972556041752e-05, |
|
"loss": 0.3383, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.8382045929018789, |
|
"grad_norm": 0.5849418640136719, |
|
"learning_rate": 1.2904660156648074e-05, |
|
"loss": 0.2803, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.8392484342379958, |
|
"grad_norm": 0.538429856300354, |
|
"learning_rate": 1.2742304240994053e-05, |
|
"loss": 0.2402, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.8402922755741128, |
|
"grad_norm": 0.6266717314720154, |
|
"learning_rate": 1.2580906592077402e-05, |
|
"loss": 0.3009, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8413361169102297, |
|
"grad_norm": 0.5892521142959595, |
|
"learning_rate": 1.2420468982372158e-05, |
|
"loss": 0.249, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.8423799582463466, |
|
"grad_norm": 0.5853463411331177, |
|
"learning_rate": 1.226099317380912e-05, |
|
"loss": 0.2465, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.8434237995824635, |
|
"grad_norm": 0.640910267829895, |
|
"learning_rate": 1.210248091775663e-05, |
|
"loss": 0.1872, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.8444676409185804, |
|
"grad_norm": 0.6560840606689453, |
|
"learning_rate": 1.1944933955001225e-05, |
|
"loss": 0.2542, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.8455114822546973, |
|
"grad_norm": 0.5866312980651855, |
|
"learning_rate": 1.1788354015728543e-05, |
|
"loss": 0.1911, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8465553235908142, |
|
"grad_norm": 0.6191656589508057, |
|
"learning_rate": 1.1632742819504405e-05, |
|
"loss": 0.2215, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.8475991649269311, |
|
"grad_norm": 0.5251643657684326, |
|
"learning_rate": 1.147810207525577e-05, |
|
"loss": 0.1583, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.848643006263048, |
|
"grad_norm": 0.4927600622177124, |
|
"learning_rate": 1.132443348125214e-05, |
|
"loss": 0.1512, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.8496868475991649, |
|
"grad_norm": 0.4139147698879242, |
|
"learning_rate": 1.1171738725086833e-05, |
|
"loss": 0.1172, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.8507306889352818, |
|
"grad_norm": 0.5602164268493652, |
|
"learning_rate": 1.1020019483658384e-05, |
|
"loss": 0.1821, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8517745302713987, |
|
"grad_norm": 0.4220430850982666, |
|
"learning_rate": 1.0869277423152246e-05, |
|
"loss": 0.1263, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.8528183716075156, |
|
"grad_norm": 0.38943833112716675, |
|
"learning_rate": 1.0719514199022473e-05, |
|
"loss": 0.1141, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.8538622129436325, |
|
"grad_norm": 0.5523675084114075, |
|
"learning_rate": 1.0570731455973414e-05, |
|
"loss": 0.1578, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.8549060542797495, |
|
"grad_norm": 0.6071298718452454, |
|
"learning_rate": 1.04229308279418e-05, |
|
"loss": 0.2323, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.8559498956158664, |
|
"grad_norm": 0.5393807291984558, |
|
"learning_rate": 1.0276113938078769e-05, |
|
"loss": 0.1967, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8569937369519833, |
|
"grad_norm": 0.6537972688674927, |
|
"learning_rate": 1.0130282398731982e-05, |
|
"loss": 0.1953, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.8580375782881002, |
|
"grad_norm": 0.4417877197265625, |
|
"learning_rate": 9.985437811427933e-06, |
|
"loss": 0.1453, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.8590814196242171, |
|
"grad_norm": 0.6434723734855652, |
|
"learning_rate": 9.841581766854401e-06, |
|
"loss": 0.1683, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.860125260960334, |
|
"grad_norm": 0.7254697680473328, |
|
"learning_rate": 9.698715844842988e-06, |
|
"loss": 0.3499, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.8611691022964509, |
|
"grad_norm": 0.6909394264221191, |
|
"learning_rate": 9.556841614351664e-06, |
|
"loss": 0.2757, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8622129436325678, |
|
"grad_norm": 0.7137805223464966, |
|
"learning_rate": 9.415960633447674e-06, |
|
"loss": 0.3122, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.8632567849686847, |
|
"grad_norm": 0.7450171709060669, |
|
"learning_rate": 9.276074449290361e-06, |
|
"loss": 0.2928, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.8643006263048016, |
|
"grad_norm": 0.6502891778945923, |
|
"learning_rate": 9.137184598114134e-06, |
|
"loss": 0.2517, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.8653444676409185, |
|
"grad_norm": 0.6984295845031738, |
|
"learning_rate": 8.999292605211695e-06, |
|
"loss": 0.3062, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.8663883089770354, |
|
"grad_norm": 0.7169867753982544, |
|
"learning_rate": 8.862399984917213e-06, |
|
"loss": 0.2748, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8674321503131524, |
|
"grad_norm": 0.7259141802787781, |
|
"learning_rate": 8.726508240589692e-06, |
|
"loss": 0.3033, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.8684759916492694, |
|
"grad_norm": 0.7949566841125488, |
|
"learning_rate": 8.59161886459654e-06, |
|
"loss": 0.3149, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.8695198329853863, |
|
"grad_norm": 0.7562083005905151, |
|
"learning_rate": 8.457733338297069e-06, |
|
"loss": 0.3192, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.8705636743215032, |
|
"grad_norm": 0.6112555265426636, |
|
"learning_rate": 8.3248531320263e-06, |
|
"loss": 0.2175, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.8716075156576201, |
|
"grad_norm": 0.7050125598907471, |
|
"learning_rate": 8.192979705078852e-06, |
|
"loss": 0.243, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.872651356993737, |
|
"grad_norm": 0.6470485925674438, |
|
"learning_rate": 8.062114505692742e-06, |
|
"loss": 0.2384, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.8736951983298539, |
|
"grad_norm": 0.7082952260971069, |
|
"learning_rate": 7.932258971033746e-06, |
|
"loss": 0.2795, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.8747390396659708, |
|
"grad_norm": 0.843268096446991, |
|
"learning_rate": 7.803414527179343e-06, |
|
"loss": 0.2991, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.8757828810020877, |
|
"grad_norm": 0.6356431245803833, |
|
"learning_rate": 7.675582589103247e-06, |
|
"loss": 0.1963, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.8768267223382046, |
|
"grad_norm": 0.569520890712738, |
|
"learning_rate": 7.548764560659816e-06, |
|
"loss": 0.1703, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8778705636743215, |
|
"grad_norm": 0.6984921097755432, |
|
"learning_rate": 7.422961834568565e-06, |
|
"loss": 0.2231, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.8789144050104384, |
|
"grad_norm": 0.6111634969711304, |
|
"learning_rate": 7.2981757923989755e-06, |
|
"loss": 0.1825, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.8799582463465553, |
|
"grad_norm": 0.666388213634491, |
|
"learning_rate": 7.174407804555261e-06, |
|
"loss": 0.1775, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.8810020876826722, |
|
"grad_norm": 0.7088585495948792, |
|
"learning_rate": 7.051659230261298e-06, |
|
"loss": 0.1873, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.8820459290187892, |
|
"grad_norm": 0.6127867102622986, |
|
"learning_rate": 6.929931417545788e-06, |
|
"loss": 0.1732, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8830897703549061, |
|
"grad_norm": 0.637973964214325, |
|
"learning_rate": 6.809225703227351e-06, |
|
"loss": 0.1856, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.884133611691023, |
|
"grad_norm": 0.5888153910636902, |
|
"learning_rate": 6.689543412899913e-06, |
|
"loss": 0.1872, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.8851774530271399, |
|
"grad_norm": 0.6536146402359009, |
|
"learning_rate": 6.57088586091813e-06, |
|
"loss": 0.2259, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.8862212943632568, |
|
"grad_norm": 0.4231550693511963, |
|
"learning_rate": 6.45325435038292e-06, |
|
"loss": 0.1388, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.8872651356993737, |
|
"grad_norm": 0.8168404698371887, |
|
"learning_rate": 6.336650173127223e-06, |
|
"loss": 0.1743, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8883089770354906, |
|
"grad_norm": 0.5078144073486328, |
|
"learning_rate": 6.221074609701738e-06, |
|
"loss": 0.2793, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.8893528183716075, |
|
"grad_norm": 0.7190085053443909, |
|
"learning_rate": 6.106528929360911e-06, |
|
"loss": 0.4109, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.8903966597077244, |
|
"grad_norm": 0.5939377546310425, |
|
"learning_rate": 5.99301439004899e-06, |
|
"loss": 0.2797, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.8914405010438413, |
|
"grad_norm": 0.7328153848648071, |
|
"learning_rate": 5.880532238386161e-06, |
|
"loss": 0.3652, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.8924843423799582, |
|
"grad_norm": 0.6011344194412231, |
|
"learning_rate": 5.769083709654932e-06, |
|
"loss": 0.2699, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8935281837160751, |
|
"grad_norm": 0.7004411220550537, |
|
"learning_rate": 5.658670027786561e-06, |
|
"loss": 0.3191, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.894572025052192, |
|
"grad_norm": 0.5985621809959412, |
|
"learning_rate": 5.549292405347495e-06, |
|
"loss": 0.2593, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.8956158663883089, |
|
"grad_norm": 0.6498935222625732, |
|
"learning_rate": 5.440952043526215e-06, |
|
"loss": 0.3027, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.8966597077244259, |
|
"grad_norm": 0.6431671380996704, |
|
"learning_rate": 5.3336501321199714e-06, |
|
"loss": 0.237, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.8977035490605428, |
|
"grad_norm": 0.6603933572769165, |
|
"learning_rate": 5.22738784952167e-06, |
|
"loss": 0.2533, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8987473903966597, |
|
"grad_norm": 0.5379349589347839, |
|
"learning_rate": 5.1221663627070485e-06, |
|
"loss": 0.1883, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.8997912317327766, |
|
"grad_norm": 0.4264977276325226, |
|
"learning_rate": 5.017986827221733e-06, |
|
"loss": 0.1174, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.9008350730688935, |
|
"grad_norm": 0.5029094815254211, |
|
"learning_rate": 4.914850387168657e-06, |
|
"loss": 0.1564, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.9018789144050104, |
|
"grad_norm": 0.5162425637245178, |
|
"learning_rate": 4.812758175195397e-06, |
|
"loss": 0.1547, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.9029227557411273, |
|
"grad_norm": 0.6308012008666992, |
|
"learning_rate": 4.711711312481815e-06, |
|
"loss": 0.1844, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.9039665970772442, |
|
"grad_norm": 0.46069368720054626, |
|
"learning_rate": 4.61171090872774e-06, |
|
"loss": 0.1363, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.9050104384133612, |
|
"grad_norm": 0.40711909532546997, |
|
"learning_rate": 4.512758062140687e-06, |
|
"loss": 0.1252, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.906054279749478, |
|
"grad_norm": 0.5450437664985657, |
|
"learning_rate": 4.4148538594239174e-06, |
|
"loss": 0.1885, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.907098121085595, |
|
"grad_norm": 0.5693588852882385, |
|
"learning_rate": 4.317999375764459e-06, |
|
"loss": 0.2161, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.9081419624217119, |
|
"grad_norm": 0.45915868878364563, |
|
"learning_rate": 4.2221956748212384e-06, |
|
"loss": 0.1642, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9091858037578288, |
|
"grad_norm": 0.3877635896205902, |
|
"learning_rate": 4.127443808713527e-06, |
|
"loss": 0.1424, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.9102296450939458, |
|
"grad_norm": 0.5209342241287231, |
|
"learning_rate": 4.033744818009244e-06, |
|
"loss": 0.1703, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.9112734864300627, |
|
"grad_norm": 0.4651091396808624, |
|
"learning_rate": 3.941099731713637e-06, |
|
"loss": 0.1584, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.9123173277661796, |
|
"grad_norm": 0.8967810869216919, |
|
"learning_rate": 3.849509567257959e-06, |
|
"loss": 0.3558, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.9133611691022965, |
|
"grad_norm": 0.8381048440933228, |
|
"learning_rate": 3.7589753304882124e-06, |
|
"loss": 0.3886, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9144050104384134, |
|
"grad_norm": 0.6149895787239075, |
|
"learning_rate": 3.669498015654249e-06, |
|
"loss": 0.31, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.9154488517745303, |
|
"grad_norm": 0.7612007856369019, |
|
"learning_rate": 3.5810786053987023e-06, |
|
"loss": 0.359, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.9164926931106472, |
|
"grad_norm": 0.6727755069732666, |
|
"learning_rate": 3.493718070746299e-06, |
|
"loss": 0.2748, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.9175365344467641, |
|
"grad_norm": 0.734786868095398, |
|
"learning_rate": 3.40741737109318e-06, |
|
"loss": 0.3066, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.918580375782881, |
|
"grad_norm": 0.6576768159866333, |
|
"learning_rate": 3.3221774541962845e-06, |
|
"loss": 0.2677, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9196242171189979, |
|
"grad_norm": 0.6196028590202332, |
|
"learning_rate": 3.2379992561630712e-06, |
|
"loss": 0.2484, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.9206680584551148, |
|
"grad_norm": 0.7527311444282532, |
|
"learning_rate": 3.1548837014411357e-06, |
|
"loss": 0.2787, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.9217118997912317, |
|
"grad_norm": 0.7063425779342651, |
|
"learning_rate": 3.0728317028080657e-06, |
|
"loss": 0.303, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.9227557411273486, |
|
"grad_norm": 0.6437200307846069, |
|
"learning_rate": 2.9918441613615123e-06, |
|
"loss": 0.2501, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.9237995824634656, |
|
"grad_norm": 0.8236239552497864, |
|
"learning_rate": 2.9119219665091344e-06, |
|
"loss": 0.3234, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9248434237995825, |
|
"grad_norm": 0.686543881893158, |
|
"learning_rate": 2.8330659959589946e-06, |
|
"loss": 0.2407, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.9258872651356994, |
|
"grad_norm": 0.7093439698219299, |
|
"learning_rate": 2.755277115709842e-06, |
|
"loss": 0.246, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.9269311064718163, |
|
"grad_norm": 0.7115840315818787, |
|
"learning_rate": 2.678556180041547e-06, |
|
"loss": 0.2551, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.9279749478079332, |
|
"grad_norm": 0.6106806993484497, |
|
"learning_rate": 2.6029040315058485e-06, |
|
"loss": 0.2137, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.9290187891440501, |
|
"grad_norm": 0.5461225509643555, |
|
"learning_rate": 2.5283215009169857e-06, |
|
"loss": 0.1847, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.930062630480167, |
|
"grad_norm": 0.6097748875617981, |
|
"learning_rate": 2.4548094073426398e-06, |
|
"loss": 0.209, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.9311064718162839, |
|
"grad_norm": 0.6483787298202515, |
|
"learning_rate": 2.3823685580949273e-06, |
|
"loss": 0.2244, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.9321503131524008, |
|
"grad_norm": 0.5993359088897705, |
|
"learning_rate": 2.3109997487214983e-06, |
|
"loss": 0.189, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.9331941544885177, |
|
"grad_norm": 0.8005963563919067, |
|
"learning_rate": 2.240703762996843e-06, |
|
"loss": 0.2385, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.9342379958246346, |
|
"grad_norm": 0.4756294786930084, |
|
"learning_rate": 2.1714813729136975e-06, |
|
"loss": 0.1531, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9352818371607515, |
|
"grad_norm": 0.577684760093689, |
|
"learning_rate": 2.1033333386744846e-06, |
|
"loss": 0.1722, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.9363256784968684, |
|
"grad_norm": 0.5431109666824341, |
|
"learning_rate": 2.036260408683033e-06, |
|
"loss": 0.1796, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.9373695198329853, |
|
"grad_norm": 0.5910576581954956, |
|
"learning_rate": 1.9702633195363917e-06, |
|
"loss": 0.1689, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.9384133611691023, |
|
"grad_norm": 0.6282055377960205, |
|
"learning_rate": 1.9053427960166182e-06, |
|
"loss": 0.1681, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.9394572025052192, |
|
"grad_norm": 0.46189793944358826, |
|
"learning_rate": 1.8414995510829368e-06, |
|
"loss": 0.0832, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9405010438413361, |
|
"grad_norm": 0.7074732780456543, |
|
"learning_rate": 1.778734285863859e-06, |
|
"loss": 0.3342, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.941544885177453, |
|
"grad_norm": 0.6299365162849426, |
|
"learning_rate": 1.717047689649487e-06, |
|
"loss": 0.3618, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.94258872651357, |
|
"grad_norm": 0.6098471879959106, |
|
"learning_rate": 1.6564404398839439e-06, |
|
"loss": 0.3235, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.9436325678496869, |
|
"grad_norm": 0.5733586549758911, |
|
"learning_rate": 1.5969132021579347e-06, |
|
"loss": 0.2646, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.9446764091858038, |
|
"grad_norm": 0.6624009609222412, |
|
"learning_rate": 1.5384666302014406e-06, |
|
"loss": 0.3138, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9457202505219207, |
|
"grad_norm": 0.7231829762458801, |
|
"learning_rate": 1.481101365876547e-06, |
|
"loss": 0.3016, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.9467640918580376, |
|
"grad_norm": 0.634404718875885, |
|
"learning_rate": 1.4248180391703614e-06, |
|
"loss": 0.2247, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.9478079331941545, |
|
"grad_norm": 0.664415180683136, |
|
"learning_rate": 1.3696172681881503e-06, |
|
"loss": 0.3176, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.9488517745302714, |
|
"grad_norm": 0.5892297625541687, |
|
"learning_rate": 1.3154996591464908e-06, |
|
"loss": 0.2327, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.9498956158663883, |
|
"grad_norm": 0.5772663354873657, |
|
"learning_rate": 1.2624658063666639e-06, |
|
"loss": 0.2104, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9509394572025052, |
|
"grad_norm": 0.5238860845565796, |
|
"learning_rate": 1.2105162922680824e-06, |
|
"loss": 0.1807, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.9519832985386222, |
|
"grad_norm": 0.5960121750831604, |
|
"learning_rate": 1.15965168736194e-06, |
|
"loss": 0.2065, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.9530271398747391, |
|
"grad_norm": 0.47030001878738403, |
|
"learning_rate": 1.109872550244917e-06, |
|
"loss": 0.1519, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.954070981210856, |
|
"grad_norm": 0.5340988636016846, |
|
"learning_rate": 1.0611794275930399e-06, |
|
"loss": 0.1603, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.9551148225469729, |
|
"grad_norm": 0.3915995955467224, |
|
"learning_rate": 1.01357285415572e-06, |
|
"loss": 0.1179, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9561586638830898, |
|
"grad_norm": 0.3927573561668396, |
|
"learning_rate": 9.670533527498137e-07, |
|
"loss": 0.1151, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.9572025052192067, |
|
"grad_norm": 0.3486252725124359, |
|
"learning_rate": 9.216214342539386e-07, |
|
"loss": 0.0942, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.9582463465553236, |
|
"grad_norm": 0.3841801583766937, |
|
"learning_rate": 8.772775976028546e-07, |
|
"loss": 0.1185, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.9592901878914405, |
|
"grad_norm": 0.49043968319892883, |
|
"learning_rate": 8.340223297819471e-07, |
|
"loss": 0.1571, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.9603340292275574, |
|
"grad_norm": 0.5569294691085815, |
|
"learning_rate": 7.918561058219198e-07, |
|
"loss": 0.1835, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9613778705636743, |
|
"grad_norm": 0.39632856845855713, |
|
"learning_rate": 7.507793887935654e-07, |
|
"loss": 0.1649, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.9624217118997912, |
|
"grad_norm": 0.46687746047973633, |
|
"learning_rate": 7.10792629802659e-07, |
|
"loss": 0.1609, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.9634655532359081, |
|
"grad_norm": 0.3641255795955658, |
|
"learning_rate": 6.718962679850505e-07, |
|
"loss": 0.1387, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.964509394572025, |
|
"grad_norm": 0.5133196115493774, |
|
"learning_rate": 6.340907305017907e-07, |
|
"loss": 0.1752, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.965553235908142, |
|
"grad_norm": 0.7964520454406738, |
|
"learning_rate": 5.973764325344688e-07, |
|
"loss": 0.3131, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9665970772442589, |
|
"grad_norm": 0.7928893566131592, |
|
"learning_rate": 5.617537772806602e-07, |
|
"loss": 0.3533, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.9676409185803758, |
|
"grad_norm": 0.7127247452735901, |
|
"learning_rate": 5.272231559494634e-07, |
|
"loss": 0.3272, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.9686847599164927, |
|
"grad_norm": 0.6839384436607361, |
|
"learning_rate": 4.937849477572587e-07, |
|
"loss": 0.3335, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.9697286012526096, |
|
"grad_norm": 0.6431897878646851, |
|
"learning_rate": 4.614395199234678e-07, |
|
"loss": 0.279, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.9707724425887265, |
|
"grad_norm": 0.6917023658752441, |
|
"learning_rate": 4.3018722766661193e-07, |
|
"loss": 0.3009, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9718162839248434, |
|
"grad_norm": 0.6915965676307678, |
|
"learning_rate": 4.0002841420032634e-07, |
|
"loss": 0.2842, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.9728601252609603, |
|
"grad_norm": 0.6639567017555237, |
|
"learning_rate": 3.7096341072964113e-07, |
|
"loss": 0.2613, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.9739039665970772, |
|
"grad_norm": 0.5585394501686096, |
|
"learning_rate": 3.4299253644732855e-07, |
|
"loss": 0.2097, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.9749478079331941, |
|
"grad_norm": 0.7092443704605103, |
|
"learning_rate": 3.161160985304168e-07, |
|
"loss": 0.2731, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.975991649269311, |
|
"grad_norm": 0.8628230094909668, |
|
"learning_rate": 2.903343921367707e-07, |
|
"loss": 0.3337, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9770354906054279, |
|
"grad_norm": 0.7503064870834351, |
|
"learning_rate": 2.6564770040190535e-07, |
|
"loss": 0.2528, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.9780793319415448, |
|
"grad_norm": 0.6571477651596069, |
|
"learning_rate": 2.420562944358329e-07, |
|
"loss": 0.2097, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.9791231732776617, |
|
"grad_norm": 0.7250248193740845, |
|
"learning_rate": 2.1956043332010955e-07, |
|
"loss": 0.2418, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.9801670146137788, |
|
"grad_norm": 0.5199112892150879, |
|
"learning_rate": 1.9816036410499338e-07, |
|
"loss": 0.1988, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.9812108559498957, |
|
"grad_norm": 0.6716915369033813, |
|
"learning_rate": 1.7785632180670198e-07, |
|
"loss": 0.2049, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9822546972860126, |
|
"grad_norm": 0.6581385731697083, |
|
"learning_rate": 1.5864852940485898e-07, |
|
"loss": 0.2065, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.9832985386221295, |
|
"grad_norm": 0.5942964553833008, |
|
"learning_rate": 1.405371978400516e-07, |
|
"loss": 0.188, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.9843423799582464, |
|
"grad_norm": 0.6144523620605469, |
|
"learning_rate": 1.2352252601147697e-07, |
|
"loss": 0.1848, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.9853862212943633, |
|
"grad_norm": 0.6265504956245422, |
|
"learning_rate": 1.0760470077479934e-07, |
|
"loss": 0.2032, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.9864300626304802, |
|
"grad_norm": 0.5609498023986816, |
|
"learning_rate": 9.278389694006296e-08, |
|
"loss": 0.1657, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9874739039665971, |
|
"grad_norm": 0.5039533972740173, |
|
"learning_rate": 7.906027726981568e-08, |
|
"loss": 0.158, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.988517745302714, |
|
"grad_norm": 0.5719618201255798, |
|
"learning_rate": 6.643399247725502e-08, |
|
"loss": 0.151, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.9895615866388309, |
|
"grad_norm": 0.5313311815261841, |
|
"learning_rate": 5.490518122465149e-08, |
|
"loss": 0.1563, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.9906054279749478, |
|
"grad_norm": 0.48120567202568054, |
|
"learning_rate": 4.447397012177224e-08, |
|
"loss": 0.1376, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.9916492693110647, |
|
"grad_norm": 0.44713348150253296, |
|
"learning_rate": 3.5140473724482034e-08, |
|
"loss": 0.1198, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9926931106471816, |
|
"grad_norm": 0.6056554913520813, |
|
"learning_rate": 2.6904794533544332e-08, |
|
"loss": 0.2989, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.9937369519832986, |
|
"grad_norm": 0.6082270741462708, |
|
"learning_rate": 1.976702299344435e-08, |
|
"loss": 0.2356, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.9947807933194155, |
|
"grad_norm": 0.46945539116859436, |
|
"learning_rate": 1.3727237491412137e-08, |
|
"loss": 0.1697, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.9958246346555324, |
|
"grad_norm": 0.5799762010574341, |
|
"learning_rate": 8.785504356556563e-09, |
|
"loss": 0.2196, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.9968684759916493, |
|
"grad_norm": 0.6873183846473694, |
|
"learning_rate": 4.941877859143684e-09, |
|
"loss": 0.2819, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9979123173277662, |
|
"grad_norm": 0.6708718538284302, |
|
"learning_rate": 2.1964002100083312e-09, |
|
"loss": 0.2342, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.9989561586638831, |
|
"grad_norm": 0.6889147758483887, |
|
"learning_rate": 5.491015600656013e-10, |
|
"loss": 0.2048, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.7045766115188599, |
|
"learning_rate": 0.0, |
|
"loss": 0.1947, |
|
"step": 958 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 958, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 240, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.355154213385011e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|