|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 48870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003069367710251688, |
|
"grad_norm": 6.029524326324463, |
|
"learning_rate": 2.9969920196439535e-05, |
|
"loss": 2.626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006138735420503376, |
|
"grad_norm": 4.651051044464111, |
|
"learning_rate": 2.993922651933702e-05, |
|
"loss": 2.5542, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009208103130755065, |
|
"grad_norm": 4.611798286437988, |
|
"learning_rate": 2.99085328422345e-05, |
|
"loss": 2.4749, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012277470841006752, |
|
"grad_norm": 5.144523620605469, |
|
"learning_rate": 2.9877839165131984e-05, |
|
"loss": 2.4706, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.015346838551258441, |
|
"grad_norm": 5.4743242263793945, |
|
"learning_rate": 2.9847145488029468e-05, |
|
"loss": 2.5232, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01841620626151013, |
|
"grad_norm": 4.009519100189209, |
|
"learning_rate": 2.981645181092695e-05, |
|
"loss": 2.4524, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.021485573971761818, |
|
"grad_norm": 4.539732456207275, |
|
"learning_rate": 2.9785758133824433e-05, |
|
"loss": 2.4725, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.024554941682013505, |
|
"grad_norm": 3.896458148956299, |
|
"learning_rate": 2.9755064456721917e-05, |
|
"loss": 2.4763, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.027624309392265192, |
|
"grad_norm": 4.5362982749938965, |
|
"learning_rate": 2.9724370779619398e-05, |
|
"loss": 2.4475, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.030693677102516883, |
|
"grad_norm": 3.903672456741333, |
|
"learning_rate": 2.9693677102516883e-05, |
|
"loss": 2.4884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03376304481276857, |
|
"grad_norm": 3.7690269947052, |
|
"learning_rate": 2.9662983425414363e-05, |
|
"loss": 2.4525, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03683241252302026, |
|
"grad_norm": 3.7247776985168457, |
|
"learning_rate": 2.9632289748311848e-05, |
|
"loss": 2.4778, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03990178023327195, |
|
"grad_norm": 5.1837310791015625, |
|
"learning_rate": 2.9601596071209332e-05, |
|
"loss": 2.4917, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.042971147943523635, |
|
"grad_norm": 4.13453483581543, |
|
"learning_rate": 2.9570902394106813e-05, |
|
"loss": 2.4869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04604051565377532, |
|
"grad_norm": 3.7461624145507812, |
|
"learning_rate": 2.9540208717004297e-05, |
|
"loss": 2.4408, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04910988336402701, |
|
"grad_norm": 3.429506778717041, |
|
"learning_rate": 2.950951503990178e-05, |
|
"loss": 2.442, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0521792510742787, |
|
"grad_norm": 4.001476287841797, |
|
"learning_rate": 2.9478821362799262e-05, |
|
"loss": 2.3645, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.055248618784530384, |
|
"grad_norm": 4.0728960037231445, |
|
"learning_rate": 2.9448127685696746e-05, |
|
"loss": 2.5016, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05831798649478207, |
|
"grad_norm": 4.214133262634277, |
|
"learning_rate": 2.941743400859423e-05, |
|
"loss": 2.4142, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.061387354205033766, |
|
"grad_norm": 4.556704521179199, |
|
"learning_rate": 2.938674033149171e-05, |
|
"loss": 2.5025, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06445672191528545, |
|
"grad_norm": 4.376175403594971, |
|
"learning_rate": 2.9356046654389195e-05, |
|
"loss": 2.3906, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.06752608962553713, |
|
"grad_norm": 4.843928813934326, |
|
"learning_rate": 2.932535297728668e-05, |
|
"loss": 2.4598, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07059545733578883, |
|
"grad_norm": 4.809575080871582, |
|
"learning_rate": 2.9294659300184164e-05, |
|
"loss": 2.4699, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.07366482504604052, |
|
"grad_norm": 4.783190727233887, |
|
"learning_rate": 2.9263965623081648e-05, |
|
"loss": 2.4256, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0767341927562922, |
|
"grad_norm": 3.410017728805542, |
|
"learning_rate": 2.923327194597913e-05, |
|
"loss": 2.469, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0798035604665439, |
|
"grad_norm": 4.03816556930542, |
|
"learning_rate": 2.9202578268876613e-05, |
|
"loss": 2.3954, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08287292817679558, |
|
"grad_norm": 4.535765171051025, |
|
"learning_rate": 2.9171884591774097e-05, |
|
"loss": 2.3857, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.08594229588704727, |
|
"grad_norm": 3.941899061203003, |
|
"learning_rate": 2.9141190914671578e-05, |
|
"loss": 2.4259, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08901166359729895, |
|
"grad_norm": 3.957204818725586, |
|
"learning_rate": 2.9110497237569062e-05, |
|
"loss": 2.4487, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.09208103130755065, |
|
"grad_norm": 4.369974136352539, |
|
"learning_rate": 2.9079803560466547e-05, |
|
"loss": 2.4166, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09515039901780234, |
|
"grad_norm": 3.611785650253296, |
|
"learning_rate": 2.9049109883364027e-05, |
|
"loss": 2.4958, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.09821976672805402, |
|
"grad_norm": 5.152332305908203, |
|
"learning_rate": 2.901841620626151e-05, |
|
"loss": 2.4116, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.10128913443830571, |
|
"grad_norm": 3.69728684425354, |
|
"learning_rate": 2.8987722529158996e-05, |
|
"loss": 2.3373, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1043585021485574, |
|
"grad_norm": 4.104907512664795, |
|
"learning_rate": 2.8957028852056477e-05, |
|
"loss": 2.3961, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.10742786985880909, |
|
"grad_norm": 4.160801887512207, |
|
"learning_rate": 2.892633517495396e-05, |
|
"loss": 2.3749, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.11049723756906077, |
|
"grad_norm": 4.3206329345703125, |
|
"learning_rate": 2.8895641497851445e-05, |
|
"loss": 2.3582, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11356660527931246, |
|
"grad_norm": 5.133695125579834, |
|
"learning_rate": 2.8864947820748926e-05, |
|
"loss": 2.4267, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.11663597298956414, |
|
"grad_norm": 3.392789125442505, |
|
"learning_rate": 2.883425414364641e-05, |
|
"loss": 2.4299, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.11970534069981584, |
|
"grad_norm": 4.9408135414123535, |
|
"learning_rate": 2.880356046654389e-05, |
|
"loss": 2.3965, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.12277470841006753, |
|
"grad_norm": 4.085551738739014, |
|
"learning_rate": 2.8772866789441375e-05, |
|
"loss": 2.4148, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12584407612031923, |
|
"grad_norm": 4.733358860015869, |
|
"learning_rate": 2.874217311233886e-05, |
|
"loss": 2.4431, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.1289134438305709, |
|
"grad_norm": 3.3507819175720215, |
|
"learning_rate": 2.871147943523634e-05, |
|
"loss": 2.3127, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1319828115408226, |
|
"grad_norm": 5.074892520904541, |
|
"learning_rate": 2.8680785758133825e-05, |
|
"loss": 2.2961, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.13505217925107427, |
|
"grad_norm": 4.0248332023620605, |
|
"learning_rate": 2.865009208103131e-05, |
|
"loss": 2.4545, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.13812154696132597, |
|
"grad_norm": 3.959451675415039, |
|
"learning_rate": 2.861939840392879e-05, |
|
"loss": 2.4582, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.14119091467157766, |
|
"grad_norm": 5.6295294761657715, |
|
"learning_rate": 2.8588704726826274e-05, |
|
"loss": 2.4184, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.14426028238182934, |
|
"grad_norm": 4.008995056152344, |
|
"learning_rate": 2.8558011049723758e-05, |
|
"loss": 2.436, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.14732965009208104, |
|
"grad_norm": 4.01780891418457, |
|
"learning_rate": 2.852731737262124e-05, |
|
"loss": 2.4243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15039901780233272, |
|
"grad_norm": 3.839801549911499, |
|
"learning_rate": 2.8496623695518723e-05, |
|
"loss": 2.4003, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.1534683855125844, |
|
"grad_norm": 3.6963717937469482, |
|
"learning_rate": 2.8465930018416207e-05, |
|
"loss": 2.4496, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.15653775322283608, |
|
"grad_norm": 4.555826187133789, |
|
"learning_rate": 2.8435236341313688e-05, |
|
"loss": 2.4101, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.1596071209330878, |
|
"grad_norm": 3.507671356201172, |
|
"learning_rate": 2.8404542664211172e-05, |
|
"loss": 2.4258, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.16267648864333947, |
|
"grad_norm": 4.644598007202148, |
|
"learning_rate": 2.8373848987108653e-05, |
|
"loss": 2.3937, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.16574585635359115, |
|
"grad_norm": 3.737030506134033, |
|
"learning_rate": 2.834315531000614e-05, |
|
"loss": 2.3961, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.16881522406384286, |
|
"grad_norm": 3.4786527156829834, |
|
"learning_rate": 2.8312461632903625e-05, |
|
"loss": 2.4414, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.17188459177409454, |
|
"grad_norm": 3.9619481563568115, |
|
"learning_rate": 2.8281767955801106e-05, |
|
"loss": 2.4952, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.17495395948434622, |
|
"grad_norm": 4.628708362579346, |
|
"learning_rate": 2.825107427869859e-05, |
|
"loss": 2.4284, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.1780233271945979, |
|
"grad_norm": 3.561638593673706, |
|
"learning_rate": 2.8220380601596074e-05, |
|
"loss": 2.3983, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1810926949048496, |
|
"grad_norm": 4.126139163970947, |
|
"learning_rate": 2.8189686924493555e-05, |
|
"loss": 2.3642, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.1841620626151013, |
|
"grad_norm": 3.888535737991333, |
|
"learning_rate": 2.815899324739104e-05, |
|
"loss": 2.2939, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18723143032535297, |
|
"grad_norm": 4.639522552490234, |
|
"learning_rate": 2.8128299570288524e-05, |
|
"loss": 2.5008, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.19030079803560468, |
|
"grad_norm": 4.336818695068359, |
|
"learning_rate": 2.8097605893186004e-05, |
|
"loss": 2.3622, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.19337016574585636, |
|
"grad_norm": 3.351541519165039, |
|
"learning_rate": 2.806691221608349e-05, |
|
"loss": 2.3638, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.19643953345610804, |
|
"grad_norm": 7.246761798858643, |
|
"learning_rate": 2.8036218538980973e-05, |
|
"loss": 2.4054, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.19950890116635972, |
|
"grad_norm": 3.76177978515625, |
|
"learning_rate": 2.8005524861878454e-05, |
|
"loss": 2.4991, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.20257826887661143, |
|
"grad_norm": 3.489014148712158, |
|
"learning_rate": 2.7974831184775938e-05, |
|
"loss": 2.3624, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2056476365868631, |
|
"grad_norm": 3.3915023803710938, |
|
"learning_rate": 2.794413750767342e-05, |
|
"loss": 2.3862, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.2087170042971148, |
|
"grad_norm": 3.8997626304626465, |
|
"learning_rate": 2.7913443830570903e-05, |
|
"loss": 2.2968, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.21178637200736647, |
|
"grad_norm": 3.1932547092437744, |
|
"learning_rate": 2.7882750153468387e-05, |
|
"loss": 2.3592, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.21485573971761818, |
|
"grad_norm": 4.888554096221924, |
|
"learning_rate": 2.7852056476365868e-05, |
|
"loss": 2.4128, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.21792510742786986, |
|
"grad_norm": 4.414622783660889, |
|
"learning_rate": 2.7821362799263352e-05, |
|
"loss": 2.4198, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.22099447513812154, |
|
"grad_norm": 3.756305456161499, |
|
"learning_rate": 2.7790669122160836e-05, |
|
"loss": 2.3664, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.22406384284837325, |
|
"grad_norm": 4.073141098022461, |
|
"learning_rate": 2.7759975445058317e-05, |
|
"loss": 2.3876, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.22713321055862493, |
|
"grad_norm": 2.9590511322021484, |
|
"learning_rate": 2.77292817679558e-05, |
|
"loss": 2.3702, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2302025782688766, |
|
"grad_norm": 3.9760727882385254, |
|
"learning_rate": 2.7698588090853286e-05, |
|
"loss": 2.3944, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2332719459791283, |
|
"grad_norm": 3.8550660610198975, |
|
"learning_rate": 2.7667894413750767e-05, |
|
"loss": 2.3543, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.23634131368938, |
|
"grad_norm": 5.260376930236816, |
|
"learning_rate": 2.763720073664825e-05, |
|
"loss": 2.3058, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.23941068139963168, |
|
"grad_norm": 3.408364772796631, |
|
"learning_rate": 2.7606507059545735e-05, |
|
"loss": 2.4173, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.24248004910988336, |
|
"grad_norm": 3.482743263244629, |
|
"learning_rate": 2.7575813382443216e-05, |
|
"loss": 2.3997, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.24554941682013506, |
|
"grad_norm": 3.7070858478546143, |
|
"learning_rate": 2.75451197053407e-05, |
|
"loss": 2.4266, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.24861878453038674, |
|
"grad_norm": 4.024835109710693, |
|
"learning_rate": 2.751442602823818e-05, |
|
"loss": 2.3201, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.25168815224063845, |
|
"grad_norm": 4.50393533706665, |
|
"learning_rate": 2.7483732351135665e-05, |
|
"loss": 2.3541, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.25475751995089013, |
|
"grad_norm": 4.806800365447998, |
|
"learning_rate": 2.745303867403315e-05, |
|
"loss": 2.4102, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.2578268876611418, |
|
"grad_norm": 3.5676867961883545, |
|
"learning_rate": 2.742234499693063e-05, |
|
"loss": 2.2263, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2608962553713935, |
|
"grad_norm": 3.5269808769226074, |
|
"learning_rate": 2.7391651319828114e-05, |
|
"loss": 2.2747, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.2639656230816452, |
|
"grad_norm": 4.45748233795166, |
|
"learning_rate": 2.73609576427256e-05, |
|
"loss": 2.3761, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.26703499079189685, |
|
"grad_norm": 4.212084770202637, |
|
"learning_rate": 2.7330263965623083e-05, |
|
"loss": 2.4948, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.27010435850214853, |
|
"grad_norm": 4.344183444976807, |
|
"learning_rate": 2.7299570288520567e-05, |
|
"loss": 2.4308, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.27317372621240027, |
|
"grad_norm": 3.9603850841522217, |
|
"learning_rate": 2.726887661141805e-05, |
|
"loss": 2.4254, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.27624309392265195, |
|
"grad_norm": 4.453806400299072, |
|
"learning_rate": 2.7238182934315532e-05, |
|
"loss": 2.4717, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.27931246163290363, |
|
"grad_norm": 3.924269437789917, |
|
"learning_rate": 2.7207489257213016e-05, |
|
"loss": 2.3845, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.2823818293431553, |
|
"grad_norm": 4.238902568817139, |
|
"learning_rate": 2.71767955801105e-05, |
|
"loss": 2.4194, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.285451197053407, |
|
"grad_norm": 3.7095208168029785, |
|
"learning_rate": 2.714610190300798e-05, |
|
"loss": 2.3036, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.28852056476365867, |
|
"grad_norm": 3.765899419784546, |
|
"learning_rate": 2.7115408225905466e-05, |
|
"loss": 2.4129, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.29158993247391035, |
|
"grad_norm": 4.793846130371094, |
|
"learning_rate": 2.7084714548802946e-05, |
|
"loss": 2.485, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.2946593001841621, |
|
"grad_norm": 3.294372797012329, |
|
"learning_rate": 2.705402087170043e-05, |
|
"loss": 2.3896, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.29772866789441377, |
|
"grad_norm": 4.122358798980713, |
|
"learning_rate": 2.7023327194597915e-05, |
|
"loss": 2.3746, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.30079803560466545, |
|
"grad_norm": 3.1560487747192383, |
|
"learning_rate": 2.6992633517495396e-05, |
|
"loss": 2.346, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.30386740331491713, |
|
"grad_norm": 3.7786178588867188, |
|
"learning_rate": 2.696193984039288e-05, |
|
"loss": 2.3669, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.3069367710251688, |
|
"grad_norm": 3.4450085163116455, |
|
"learning_rate": 2.6931246163290364e-05, |
|
"loss": 2.3475, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3100061387354205, |
|
"grad_norm": 4.380395889282227, |
|
"learning_rate": 2.6900552486187845e-05, |
|
"loss": 2.3755, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.31307550644567217, |
|
"grad_norm": 4.272715091705322, |
|
"learning_rate": 2.686985880908533e-05, |
|
"loss": 2.3517, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.3161448741559239, |
|
"grad_norm": 5.976320266723633, |
|
"learning_rate": 2.6839165131982813e-05, |
|
"loss": 2.3665, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3192142418661756, |
|
"grad_norm": 4.1683759689331055, |
|
"learning_rate": 2.6808471454880294e-05, |
|
"loss": 2.2517, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.32228360957642727, |
|
"grad_norm": 3.623004674911499, |
|
"learning_rate": 2.677777777777778e-05, |
|
"loss": 2.4741, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.32535297728667895, |
|
"grad_norm": 4.282279968261719, |
|
"learning_rate": 2.6747084100675263e-05, |
|
"loss": 2.3191, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3284223449969306, |
|
"grad_norm": 3.761108636856079, |
|
"learning_rate": 2.6716390423572744e-05, |
|
"loss": 2.4249, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.3314917127071823, |
|
"grad_norm": 3.7316195964813232, |
|
"learning_rate": 2.6685696746470228e-05, |
|
"loss": 2.3679, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.334561080417434, |
|
"grad_norm": 3.8297908306121826, |
|
"learning_rate": 2.665500306936771e-05, |
|
"loss": 2.4157, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.3376304481276857, |
|
"grad_norm": 4.17257833480835, |
|
"learning_rate": 2.6624309392265193e-05, |
|
"loss": 2.2362, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.3406998158379374, |
|
"grad_norm": 4.0827484130859375, |
|
"learning_rate": 2.6593615715162677e-05, |
|
"loss": 2.3369, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.3437691835481891, |
|
"grad_norm": 3.3072879314422607, |
|
"learning_rate": 2.6562922038060158e-05, |
|
"loss": 2.3797, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.34683855125844076, |
|
"grad_norm": 3.316751480102539, |
|
"learning_rate": 2.6532228360957642e-05, |
|
"loss": 2.4562, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.34990791896869244, |
|
"grad_norm": 4.482247829437256, |
|
"learning_rate": 2.6501534683855126e-05, |
|
"loss": 2.373, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3529772866789441, |
|
"grad_norm": 4.056297779083252, |
|
"learning_rate": 2.6470841006752607e-05, |
|
"loss": 2.4294, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.3560466543891958, |
|
"grad_norm": 3.596730947494507, |
|
"learning_rate": 2.644014732965009e-05, |
|
"loss": 2.3219, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.35911602209944754, |
|
"grad_norm": 4.089284896850586, |
|
"learning_rate": 2.6409453652547576e-05, |
|
"loss": 2.4088, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.3621853898096992, |
|
"grad_norm": 4.409155368804932, |
|
"learning_rate": 2.6378759975445056e-05, |
|
"loss": 2.3561, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.3652547575199509, |
|
"grad_norm": 5.397696018218994, |
|
"learning_rate": 2.6348066298342544e-05, |
|
"loss": 2.3602, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.3683241252302026, |
|
"grad_norm": 3.62880277633667, |
|
"learning_rate": 2.6317372621240028e-05, |
|
"loss": 2.4074, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.37139349294045426, |
|
"grad_norm": 3.9512007236480713, |
|
"learning_rate": 2.628667894413751e-05, |
|
"loss": 2.3754, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.37446286065070594, |
|
"grad_norm": 4.806766033172607, |
|
"learning_rate": 2.6255985267034993e-05, |
|
"loss": 2.362, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.3775322283609576, |
|
"grad_norm": 4.055029392242432, |
|
"learning_rate": 2.6225291589932474e-05, |
|
"loss": 2.3769, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.38060159607120936, |
|
"grad_norm": 4.231038570404053, |
|
"learning_rate": 2.619459791282996e-05, |
|
"loss": 2.4561, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.38367096378146104, |
|
"grad_norm": 3.5260846614837646, |
|
"learning_rate": 2.6163904235727443e-05, |
|
"loss": 2.4241, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.3867403314917127, |
|
"grad_norm": 4.557247161865234, |
|
"learning_rate": 2.6133210558624923e-05, |
|
"loss": 2.3357, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.3898096992019644, |
|
"grad_norm": 3.3867931365966797, |
|
"learning_rate": 2.6102516881522408e-05, |
|
"loss": 2.2559, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.3928790669122161, |
|
"grad_norm": 4.224850177764893, |
|
"learning_rate": 2.6071823204419892e-05, |
|
"loss": 2.3338, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.39594843462246776, |
|
"grad_norm": 4.095287322998047, |
|
"learning_rate": 2.6041129527317373e-05, |
|
"loss": 2.3384, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.39901780233271944, |
|
"grad_norm": 2.982825517654419, |
|
"learning_rate": 2.6010435850214857e-05, |
|
"loss": 2.2956, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4020871700429711, |
|
"grad_norm": 4.3587164878845215, |
|
"learning_rate": 2.597974217311234e-05, |
|
"loss": 2.313, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.40515653775322286, |
|
"grad_norm": 4.225688457489014, |
|
"learning_rate": 2.5949048496009822e-05, |
|
"loss": 2.3302, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.40822590546347454, |
|
"grad_norm": 3.7242987155914307, |
|
"learning_rate": 2.5918354818907306e-05, |
|
"loss": 2.3699, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.4112952731737262, |
|
"grad_norm": 4.286329746246338, |
|
"learning_rate": 2.588766114180479e-05, |
|
"loss": 2.3835, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.4143646408839779, |
|
"grad_norm": 3.054641008377075, |
|
"learning_rate": 2.585696746470227e-05, |
|
"loss": 2.3717, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.4174340085942296, |
|
"grad_norm": 3.183530330657959, |
|
"learning_rate": 2.5826273787599756e-05, |
|
"loss": 2.4187, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.42050337630448126, |
|
"grad_norm": 3.300554037094116, |
|
"learning_rate": 2.5795580110497236e-05, |
|
"loss": 2.381, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.42357274401473294, |
|
"grad_norm": 3.742980718612671, |
|
"learning_rate": 2.576488643339472e-05, |
|
"loss": 2.3156, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.4266421117249847, |
|
"grad_norm": 4.5531182289123535, |
|
"learning_rate": 2.5734192756292205e-05, |
|
"loss": 2.2977, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.42971147943523635, |
|
"grad_norm": 4.084106922149658, |
|
"learning_rate": 2.5703499079189686e-05, |
|
"loss": 2.3399, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.43278084714548803, |
|
"grad_norm": 3.3674798011779785, |
|
"learning_rate": 2.567280540208717e-05, |
|
"loss": 2.4205, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.4358502148557397, |
|
"grad_norm": 3.401578664779663, |
|
"learning_rate": 2.5642111724984654e-05, |
|
"loss": 2.3318, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.4389195825659914, |
|
"grad_norm": 3.7515037059783936, |
|
"learning_rate": 2.5611418047882135e-05, |
|
"loss": 2.3514, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.4419889502762431, |
|
"grad_norm": 3.1685400009155273, |
|
"learning_rate": 2.558072437077962e-05, |
|
"loss": 2.3682, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.44505831798649476, |
|
"grad_norm": 3.990283489227295, |
|
"learning_rate": 2.5550030693677103e-05, |
|
"loss": 2.2508, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.4481276856967465, |
|
"grad_norm": 4.544641971588135, |
|
"learning_rate": 2.5519337016574584e-05, |
|
"loss": 2.3624, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.45119705340699817, |
|
"grad_norm": 3.986804962158203, |
|
"learning_rate": 2.548864333947207e-05, |
|
"loss": 2.384, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.45426642111724985, |
|
"grad_norm": 4.338369846343994, |
|
"learning_rate": 2.5457949662369553e-05, |
|
"loss": 2.3311, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.45733578882750153, |
|
"grad_norm": 3.8103485107421875, |
|
"learning_rate": 2.5427255985267033e-05, |
|
"loss": 2.2405, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.4604051565377532, |
|
"grad_norm": 3.461311101913452, |
|
"learning_rate": 2.5396562308164518e-05, |
|
"loss": 2.3843, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4634745242480049, |
|
"grad_norm": 3.6048476696014404, |
|
"learning_rate": 2.5365868631062002e-05, |
|
"loss": 2.395, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.4665438919582566, |
|
"grad_norm": 3.4888088703155518, |
|
"learning_rate": 2.5335174953959486e-05, |
|
"loss": 2.3449, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.4696132596685083, |
|
"grad_norm": 3.9866726398468018, |
|
"learning_rate": 2.530448127685697e-05, |
|
"loss": 2.3806, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.47268262737876, |
|
"grad_norm": 3.5875093936920166, |
|
"learning_rate": 2.527378759975445e-05, |
|
"loss": 2.32, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.47575199508901167, |
|
"grad_norm": 3.5421364307403564, |
|
"learning_rate": 2.5243093922651935e-05, |
|
"loss": 2.4109, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.47882136279926335, |
|
"grad_norm": 3.4700534343719482, |
|
"learning_rate": 2.521240024554942e-05, |
|
"loss": 2.38, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.48189073050951503, |
|
"grad_norm": 3.879786729812622, |
|
"learning_rate": 2.51817065684469e-05, |
|
"loss": 2.389, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.4849600982197667, |
|
"grad_norm": 4.261646747589111, |
|
"learning_rate": 2.5151012891344385e-05, |
|
"loss": 2.3983, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.4880294659300184, |
|
"grad_norm": 3.6179709434509277, |
|
"learning_rate": 2.512031921424187e-05, |
|
"loss": 2.3239, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.4910988336402701, |
|
"grad_norm": 3.9921975135803223, |
|
"learning_rate": 2.508962553713935e-05, |
|
"loss": 2.3513, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4941682013505218, |
|
"grad_norm": 3.7387430667877197, |
|
"learning_rate": 2.5058931860036834e-05, |
|
"loss": 2.3613, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.4972375690607735, |
|
"grad_norm": 5.033226013183594, |
|
"learning_rate": 2.5028238182934318e-05, |
|
"loss": 2.3512, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5003069367710252, |
|
"grad_norm": 3.8006532192230225, |
|
"learning_rate": 2.49975445058318e-05, |
|
"loss": 2.4073, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.5033763044812769, |
|
"grad_norm": 3.866370916366577, |
|
"learning_rate": 2.4966850828729283e-05, |
|
"loss": 2.2788, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.5064456721915286, |
|
"grad_norm": 3.6110663414001465, |
|
"learning_rate": 2.4936157151626764e-05, |
|
"loss": 2.3755, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.5095150399017803, |
|
"grad_norm": 3.55604887008667, |
|
"learning_rate": 2.4905463474524248e-05, |
|
"loss": 2.3332, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.512584407612032, |
|
"grad_norm": 3.8975794315338135, |
|
"learning_rate": 2.4874769797421732e-05, |
|
"loss": 2.3691, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.5156537753222836, |
|
"grad_norm": 3.8415000438690186, |
|
"learning_rate": 2.4844076120319213e-05, |
|
"loss": 2.3481, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5187231430325353, |
|
"grad_norm": 2.9400956630706787, |
|
"learning_rate": 2.4813382443216698e-05, |
|
"loss": 2.3491, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.521792510742787, |
|
"grad_norm": 3.419921875, |
|
"learning_rate": 2.4782688766114182e-05, |
|
"loss": 2.2612, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5248618784530387, |
|
"grad_norm": 3.066887617111206, |
|
"learning_rate": 2.4751995089011663e-05, |
|
"loss": 2.3013, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.5279312461632903, |
|
"grad_norm": 3.7565064430236816, |
|
"learning_rate": 2.4721301411909147e-05, |
|
"loss": 2.3407, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.531000613873542, |
|
"grad_norm": 3.6040759086608887, |
|
"learning_rate": 2.469060773480663e-05, |
|
"loss": 2.3678, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.5340699815837937, |
|
"grad_norm": 3.4393301010131836, |
|
"learning_rate": 2.4659914057704112e-05, |
|
"loss": 2.321, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5371393492940454, |
|
"grad_norm": 4.549371242523193, |
|
"learning_rate": 2.4629220380601596e-05, |
|
"loss": 2.235, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.5402087170042971, |
|
"grad_norm": 3.2161850929260254, |
|
"learning_rate": 2.459852670349908e-05, |
|
"loss": 2.4156, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5432780847145487, |
|
"grad_norm": 4.914154052734375, |
|
"learning_rate": 2.456783302639656e-05, |
|
"loss": 2.3127, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.5463474524248005, |
|
"grad_norm": 2.948514699935913, |
|
"learning_rate": 2.4537139349294045e-05, |
|
"loss": 2.3924, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.5494168201350522, |
|
"grad_norm": 3.669295072555542, |
|
"learning_rate": 2.4506445672191526e-05, |
|
"loss": 2.3639, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.5524861878453039, |
|
"grad_norm": 4.5282793045043945, |
|
"learning_rate": 2.447575199508901e-05, |
|
"loss": 2.2916, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 3.6461524963378906, |
|
"learning_rate": 2.4445058317986495e-05, |
|
"loss": 2.4551, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.5586249232658073, |
|
"grad_norm": 3.558283567428589, |
|
"learning_rate": 2.4414364640883975e-05, |
|
"loss": 2.3189, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5616942909760589, |
|
"grad_norm": 3.31160569190979, |
|
"learning_rate": 2.4383670963781463e-05, |
|
"loss": 2.2667, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.5647636586863106, |
|
"grad_norm": 4.318973541259766, |
|
"learning_rate": 2.4352977286678947e-05, |
|
"loss": 2.3747, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5678330263965623, |
|
"grad_norm": 3.0922465324401855, |
|
"learning_rate": 2.4322283609576428e-05, |
|
"loss": 2.2472, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.570902394106814, |
|
"grad_norm": 3.615382432937622, |
|
"learning_rate": 2.4291589932473912e-05, |
|
"loss": 2.3784, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.5739717618170657, |
|
"grad_norm": 4.567912578582764, |
|
"learning_rate": 2.4260896255371397e-05, |
|
"loss": 2.3389, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.5770411295273173, |
|
"grad_norm": 6.014679431915283, |
|
"learning_rate": 2.4230202578268877e-05, |
|
"loss": 2.2728, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.580110497237569, |
|
"grad_norm": 3.3985538482666016, |
|
"learning_rate": 2.419950890116636e-05, |
|
"loss": 2.3619, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.5831798649478207, |
|
"grad_norm": 3.6719815731048584, |
|
"learning_rate": 2.4168815224063846e-05, |
|
"loss": 2.3639, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5862492326580724, |
|
"grad_norm": 4.365039348602295, |
|
"learning_rate": 2.4138121546961327e-05, |
|
"loss": 2.3359, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.5893186003683242, |
|
"grad_norm": 4.179307460784912, |
|
"learning_rate": 2.410742786985881e-05, |
|
"loss": 2.3482, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5923879680785759, |
|
"grad_norm": 3.4102518558502197, |
|
"learning_rate": 2.4076734192756292e-05, |
|
"loss": 2.3535, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.5954573357888275, |
|
"grad_norm": 4.294723033905029, |
|
"learning_rate": 2.4046040515653776e-05, |
|
"loss": 2.3686, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5985267034990792, |
|
"grad_norm": 4.105884552001953, |
|
"learning_rate": 2.401534683855126e-05, |
|
"loss": 2.3453, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.6015960712093309, |
|
"grad_norm": 3.321399450302124, |
|
"learning_rate": 2.398465316144874e-05, |
|
"loss": 2.4262, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.6046654389195826, |
|
"grad_norm": 4.505974292755127, |
|
"learning_rate": 2.3953959484346225e-05, |
|
"loss": 2.3449, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.6077348066298343, |
|
"grad_norm": 3.7572021484375, |
|
"learning_rate": 2.392326580724371e-05, |
|
"loss": 2.3861, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.6108041743400859, |
|
"grad_norm": 3.6873481273651123, |
|
"learning_rate": 2.389257213014119e-05, |
|
"loss": 2.3733, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.6138735420503376, |
|
"grad_norm": 4.152860641479492, |
|
"learning_rate": 2.3861878453038675e-05, |
|
"loss": 2.3398, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6169429097605893, |
|
"grad_norm": 3.437129497528076, |
|
"learning_rate": 2.383118477593616e-05, |
|
"loss": 2.3327, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.620012277470841, |
|
"grad_norm": 4.148296356201172, |
|
"learning_rate": 2.380049109883364e-05, |
|
"loss": 2.278, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.6230816451810927, |
|
"grad_norm": 3.596064805984497, |
|
"learning_rate": 2.3769797421731124e-05, |
|
"loss": 2.38, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.6261510128913443, |
|
"grad_norm": 3.819524049758911, |
|
"learning_rate": 2.3739103744628608e-05, |
|
"loss": 2.2876, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.629220380601596, |
|
"grad_norm": 4.420265197753906, |
|
"learning_rate": 2.370841006752609e-05, |
|
"loss": 2.3627, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.6322897483118478, |
|
"grad_norm": 4.018989562988281, |
|
"learning_rate": 2.3677716390423573e-05, |
|
"loss": 2.3528, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.6353591160220995, |
|
"grad_norm": 3.5963191986083984, |
|
"learning_rate": 2.3647022713321054e-05, |
|
"loss": 2.3546, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.6384284837323512, |
|
"grad_norm": 4.6964874267578125, |
|
"learning_rate": 2.3616329036218538e-05, |
|
"loss": 2.4427, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.6414978514426029, |
|
"grad_norm": 3.6673765182495117, |
|
"learning_rate": 2.3585635359116022e-05, |
|
"loss": 2.3476, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.6445672191528545, |
|
"grad_norm": 4.190234184265137, |
|
"learning_rate": 2.3554941682013503e-05, |
|
"loss": 2.3326, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6476365868631062, |
|
"grad_norm": 3.8142518997192383, |
|
"learning_rate": 2.3524248004910987e-05, |
|
"loss": 2.2891, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.6507059545733579, |
|
"grad_norm": 3.8022055625915527, |
|
"learning_rate": 2.349355432780847e-05, |
|
"loss": 2.3652, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.6537753222836096, |
|
"grad_norm": 3.8915176391601562, |
|
"learning_rate": 2.3462860650705952e-05, |
|
"loss": 2.3169, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.6568446899938613, |
|
"grad_norm": 3.6545536518096924, |
|
"learning_rate": 2.3432166973603437e-05, |
|
"loss": 2.3371, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.6599140577041129, |
|
"grad_norm": 3.763852596282959, |
|
"learning_rate": 2.3401473296500924e-05, |
|
"loss": 2.3069, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.6629834254143646, |
|
"grad_norm": 3.8284034729003906, |
|
"learning_rate": 2.3370779619398405e-05, |
|
"loss": 2.3334, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6660527931246163, |
|
"grad_norm": 7.501564979553223, |
|
"learning_rate": 2.334008594229589e-05, |
|
"loss": 2.4061, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.669122160834868, |
|
"grad_norm": 3.7189035415649414, |
|
"learning_rate": 2.3309392265193374e-05, |
|
"loss": 2.3226, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.6721915285451197, |
|
"grad_norm": 3.5018155574798584, |
|
"learning_rate": 2.3278698588090854e-05, |
|
"loss": 2.2974, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.6752608962553714, |
|
"grad_norm": 4.443968772888184, |
|
"learning_rate": 2.324800491098834e-05, |
|
"loss": 2.3936, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6783302639656231, |
|
"grad_norm": 3.740535020828247, |
|
"learning_rate": 2.321731123388582e-05, |
|
"loss": 2.2829, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.6813996316758748, |
|
"grad_norm": 3.538335084915161, |
|
"learning_rate": 2.3186617556783304e-05, |
|
"loss": 2.2824, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.6844689993861265, |
|
"grad_norm": 3.7425730228424072, |
|
"learning_rate": 2.3155923879680788e-05, |
|
"loss": 2.2808, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.6875383670963782, |
|
"grad_norm": 3.8225579261779785, |
|
"learning_rate": 2.312523020257827e-05, |
|
"loss": 2.3258, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.6906077348066298, |
|
"grad_norm": 4.689228057861328, |
|
"learning_rate": 2.3094536525475753e-05, |
|
"loss": 2.2818, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.6936771025168815, |
|
"grad_norm": 3.968703031539917, |
|
"learning_rate": 2.3063842848373237e-05, |
|
"loss": 2.3767, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.6967464702271332, |
|
"grad_norm": 4.036931037902832, |
|
"learning_rate": 2.3033149171270718e-05, |
|
"loss": 2.3459, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.6998158379373849, |
|
"grad_norm": 4.426519870758057, |
|
"learning_rate": 2.3002455494168202e-05, |
|
"loss": 2.327, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.7028852056476366, |
|
"grad_norm": 3.6122524738311768, |
|
"learning_rate": 2.2971761817065686e-05, |
|
"loss": 2.3813, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.7059545733578882, |
|
"grad_norm": 5.523836612701416, |
|
"learning_rate": 2.2941068139963167e-05, |
|
"loss": 2.3577, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7090239410681399, |
|
"grad_norm": 3.1946020126342773, |
|
"learning_rate": 2.291037446286065e-05, |
|
"loss": 2.3005, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.7120933087783916, |
|
"grad_norm": 4.517838001251221, |
|
"learning_rate": 2.2879680785758136e-05, |
|
"loss": 2.3537, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.7151626764886433, |
|
"grad_norm": 3.4100501537323, |
|
"learning_rate": 2.2848987108655617e-05, |
|
"loss": 2.3526, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.7182320441988951, |
|
"grad_norm": 4.370871067047119, |
|
"learning_rate": 2.28182934315531e-05, |
|
"loss": 2.2843, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.7213014119091468, |
|
"grad_norm": 3.3597848415374756, |
|
"learning_rate": 2.2787599754450585e-05, |
|
"loss": 2.4289, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.7243707796193984, |
|
"grad_norm": 4.361307144165039, |
|
"learning_rate": 2.2756906077348066e-05, |
|
"loss": 2.3334, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.7274401473296501, |
|
"grad_norm": 3.5192790031433105, |
|
"learning_rate": 2.272621240024555e-05, |
|
"loss": 2.3935, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.7305095150399018, |
|
"grad_norm": 3.058115005493164, |
|
"learning_rate": 2.269551872314303e-05, |
|
"loss": 2.2948, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.7335788827501535, |
|
"grad_norm": 3.7125728130340576, |
|
"learning_rate": 2.2664825046040515e-05, |
|
"loss": 2.2702, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.7366482504604052, |
|
"grad_norm": 3.5723328590393066, |
|
"learning_rate": 2.2634131368938e-05, |
|
"loss": 2.3361, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7397176181706568, |
|
"grad_norm": 4.428549766540527, |
|
"learning_rate": 2.260343769183548e-05, |
|
"loss": 2.2938, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.7427869858809085, |
|
"grad_norm": 3.8374624252319336, |
|
"learning_rate": 2.2572744014732964e-05, |
|
"loss": 2.2387, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.7458563535911602, |
|
"grad_norm": 4.0965657234191895, |
|
"learning_rate": 2.254205033763045e-05, |
|
"loss": 2.2988, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.7489257213014119, |
|
"grad_norm": 3.138101816177368, |
|
"learning_rate": 2.251135666052793e-05, |
|
"loss": 2.3083, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.7519950890116636, |
|
"grad_norm": 3.7243382930755615, |
|
"learning_rate": 2.2480662983425414e-05, |
|
"loss": 2.3094, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.7550644567219152, |
|
"grad_norm": 4.791036605834961, |
|
"learning_rate": 2.2449969306322898e-05, |
|
"loss": 2.317, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.7581338244321669, |
|
"grad_norm": 4.1747236251831055, |
|
"learning_rate": 2.2419275629220382e-05, |
|
"loss": 2.462, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.7612031921424187, |
|
"grad_norm": 4.427381992340088, |
|
"learning_rate": 2.2388581952117866e-05, |
|
"loss": 2.3935, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.7642725598526704, |
|
"grad_norm": 4.517187118530273, |
|
"learning_rate": 2.235788827501535e-05, |
|
"loss": 2.3619, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.7673419275629221, |
|
"grad_norm": 3.2976391315460205, |
|
"learning_rate": 2.232719459791283e-05, |
|
"loss": 2.3363, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.7704112952731738, |
|
"grad_norm": 3.017157793045044, |
|
"learning_rate": 2.2296500920810316e-05, |
|
"loss": 2.254, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.7734806629834254, |
|
"grad_norm": 4.820321083068848, |
|
"learning_rate": 2.2265807243707796e-05, |
|
"loss": 2.2243, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.7765500306936771, |
|
"grad_norm": 4.536325454711914, |
|
"learning_rate": 2.223511356660528e-05, |
|
"loss": 2.3242, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.7796193984039288, |
|
"grad_norm": 4.465803623199463, |
|
"learning_rate": 2.2204419889502765e-05, |
|
"loss": 2.2615, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.7826887661141805, |
|
"grad_norm": 4.061604022979736, |
|
"learning_rate": 2.2173726212400246e-05, |
|
"loss": 2.2753, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.7857581338244322, |
|
"grad_norm": 3.7470462322235107, |
|
"learning_rate": 2.214303253529773e-05, |
|
"loss": 2.3521, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.7888275015346838, |
|
"grad_norm": 3.7258481979370117, |
|
"learning_rate": 2.2112338858195214e-05, |
|
"loss": 2.2881, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.7918968692449355, |
|
"grad_norm": 4.148687839508057, |
|
"learning_rate": 2.2081645181092695e-05, |
|
"loss": 2.2603, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.7949662369551872, |
|
"grad_norm": 3.804433584213257, |
|
"learning_rate": 2.205095150399018e-05, |
|
"loss": 2.2985, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.7980356046654389, |
|
"grad_norm": 4.394881248474121, |
|
"learning_rate": 2.2020257826887663e-05, |
|
"loss": 2.353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8011049723756906, |
|
"grad_norm": 5.619194030761719, |
|
"learning_rate": 2.1989564149785144e-05, |
|
"loss": 2.3212, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.8041743400859422, |
|
"grad_norm": 3.7602977752685547, |
|
"learning_rate": 2.195887047268263e-05, |
|
"loss": 2.2732, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.807243707796194, |
|
"grad_norm": 3.5865325927734375, |
|
"learning_rate": 2.1928176795580113e-05, |
|
"loss": 2.2786, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.8103130755064457, |
|
"grad_norm": 4.248644828796387, |
|
"learning_rate": 2.1897483118477594e-05, |
|
"loss": 2.2986, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.8133824432166974, |
|
"grad_norm": 3.960653781890869, |
|
"learning_rate": 2.1866789441375078e-05, |
|
"loss": 2.3929, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.8164518109269491, |
|
"grad_norm": 4.417232990264893, |
|
"learning_rate": 2.183609576427256e-05, |
|
"loss": 2.331, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.8195211786372008, |
|
"grad_norm": 4.520796298980713, |
|
"learning_rate": 2.1805402087170043e-05, |
|
"loss": 2.3343, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.8225905463474524, |
|
"grad_norm": 3.5469796657562256, |
|
"learning_rate": 2.1774708410067527e-05, |
|
"loss": 2.3623, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.8256599140577041, |
|
"grad_norm": 3.0526225566864014, |
|
"learning_rate": 2.1744014732965008e-05, |
|
"loss": 2.2649, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.8287292817679558, |
|
"grad_norm": 3.904680013656616, |
|
"learning_rate": 2.1713321055862492e-05, |
|
"loss": 2.3419, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8317986494782075, |
|
"grad_norm": 3.709381580352783, |
|
"learning_rate": 2.1682627378759976e-05, |
|
"loss": 2.37, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.8348680171884592, |
|
"grad_norm": 3.5717175006866455, |
|
"learning_rate": 2.1651933701657457e-05, |
|
"loss": 2.3169, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.8379373848987108, |
|
"grad_norm": 4.073272228240967, |
|
"learning_rate": 2.162124002455494e-05, |
|
"loss": 2.3328, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.8410067526089625, |
|
"grad_norm": 3.8410749435424805, |
|
"learning_rate": 2.1590546347452426e-05, |
|
"loss": 2.3072, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.8440761203192142, |
|
"grad_norm": 4.8291144371032715, |
|
"learning_rate": 2.1559852670349906e-05, |
|
"loss": 2.3592, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.8471454880294659, |
|
"grad_norm": 4.293553352355957, |
|
"learning_rate": 2.152915899324739e-05, |
|
"loss": 2.3657, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.8502148557397177, |
|
"grad_norm": 4.011140823364258, |
|
"learning_rate": 2.1498465316144875e-05, |
|
"loss": 2.304, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.8532842234499693, |
|
"grad_norm": 3.8303871154785156, |
|
"learning_rate": 2.1467771639042356e-05, |
|
"loss": 2.2724, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.856353591160221, |
|
"grad_norm": 4.488582611083984, |
|
"learning_rate": 2.143707796193984e-05, |
|
"loss": 2.2965, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.8594229588704727, |
|
"grad_norm": 3.4766058921813965, |
|
"learning_rate": 2.1406384284837324e-05, |
|
"loss": 2.3577, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8624923265807244, |
|
"grad_norm": 4.232321262359619, |
|
"learning_rate": 2.137569060773481e-05, |
|
"loss": 2.3609, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.8655616942909761, |
|
"grad_norm": 4.51991605758667, |
|
"learning_rate": 2.1344996930632293e-05, |
|
"loss": 2.3213, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.8686310620012277, |
|
"grad_norm": 3.356311321258545, |
|
"learning_rate": 2.1314303253529773e-05, |
|
"loss": 2.3987, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.8717004297114794, |
|
"grad_norm": 3.596140146255493, |
|
"learning_rate": 2.1283609576427258e-05, |
|
"loss": 2.362, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.8747697974217311, |
|
"grad_norm": 5.02532958984375, |
|
"learning_rate": 2.1252915899324742e-05, |
|
"loss": 2.4156, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.8778391651319828, |
|
"grad_norm": 3.531360626220703, |
|
"learning_rate": 2.1222222222222223e-05, |
|
"loss": 2.312, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.8809085328422345, |
|
"grad_norm": 4.262710094451904, |
|
"learning_rate": 2.1191528545119707e-05, |
|
"loss": 2.3432, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.8839779005524862, |
|
"grad_norm": 4.449579238891602, |
|
"learning_rate": 2.116083486801719e-05, |
|
"loss": 2.3759, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.8870472682627378, |
|
"grad_norm": 4.485136032104492, |
|
"learning_rate": 2.1130141190914672e-05, |
|
"loss": 2.2131, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.8901166359729895, |
|
"grad_norm": 3.402162551879883, |
|
"learning_rate": 2.1099447513812156e-05, |
|
"loss": 2.3088, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.8931860036832413, |
|
"grad_norm": 3.7395241260528564, |
|
"learning_rate": 2.106875383670964e-05, |
|
"loss": 2.3447, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.896255371393493, |
|
"grad_norm": 3.98075008392334, |
|
"learning_rate": 2.103806015960712e-05, |
|
"loss": 2.3214, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.8993247391037447, |
|
"grad_norm": 3.871152400970459, |
|
"learning_rate": 2.1007366482504605e-05, |
|
"loss": 2.3396, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.9023941068139963, |
|
"grad_norm": 3.5539169311523438, |
|
"learning_rate": 2.0976672805402086e-05, |
|
"loss": 2.269, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.905463474524248, |
|
"grad_norm": 4.932919502258301, |
|
"learning_rate": 2.094597912829957e-05, |
|
"loss": 2.3098, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.9085328422344997, |
|
"grad_norm": 3.794808864593506, |
|
"learning_rate": 2.0915285451197055e-05, |
|
"loss": 2.277, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.9116022099447514, |
|
"grad_norm": 3.729210138320923, |
|
"learning_rate": 2.0884591774094536e-05, |
|
"loss": 2.4038, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.9146715776550031, |
|
"grad_norm": 4.467114448547363, |
|
"learning_rate": 2.085389809699202e-05, |
|
"loss": 2.2834, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.9177409453652547, |
|
"grad_norm": 4.059779644012451, |
|
"learning_rate": 2.0823204419889504e-05, |
|
"loss": 2.3535, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.9208103130755064, |
|
"grad_norm": 4.626834392547607, |
|
"learning_rate": 2.0792510742786985e-05, |
|
"loss": 2.2687, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9238796807857581, |
|
"grad_norm": 4.254091739654541, |
|
"learning_rate": 2.076181706568447e-05, |
|
"loss": 2.3406, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.9269490484960098, |
|
"grad_norm": 4.225036144256592, |
|
"learning_rate": 2.0731123388581953e-05, |
|
"loss": 2.2917, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.9300184162062615, |
|
"grad_norm": 3.2555954456329346, |
|
"learning_rate": 2.0700429711479434e-05, |
|
"loss": 2.2775, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.9330877839165131, |
|
"grad_norm": 4.839592933654785, |
|
"learning_rate": 2.066973603437692e-05, |
|
"loss": 2.2714, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.9361571516267649, |
|
"grad_norm": 4.091184616088867, |
|
"learning_rate": 2.0639042357274403e-05, |
|
"loss": 2.2871, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.9392265193370166, |
|
"grad_norm": 3.666154384613037, |
|
"learning_rate": 2.0608348680171883e-05, |
|
"loss": 2.3108, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.9422958870472683, |
|
"grad_norm": 4.287258625030518, |
|
"learning_rate": 2.0577655003069368e-05, |
|
"loss": 2.2785, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.94536525475752, |
|
"grad_norm": 3.8487017154693604, |
|
"learning_rate": 2.054696132596685e-05, |
|
"loss": 2.3108, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.9484346224677717, |
|
"grad_norm": 3.3819682598114014, |
|
"learning_rate": 2.0516267648864333e-05, |
|
"loss": 2.3336, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.9515039901780233, |
|
"grad_norm": 4.332981109619141, |
|
"learning_rate": 2.0485573971761817e-05, |
|
"loss": 2.2581, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.954573357888275, |
|
"grad_norm": 4.729110240936279, |
|
"learning_rate": 2.0454880294659298e-05, |
|
"loss": 2.3418, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.9576427255985267, |
|
"grad_norm": 3.3030595779418945, |
|
"learning_rate": 2.0424186617556785e-05, |
|
"loss": 2.2136, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.9607120933087784, |
|
"grad_norm": 4.455896377563477, |
|
"learning_rate": 2.039349294045427e-05, |
|
"loss": 2.2574, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.9637814610190301, |
|
"grad_norm": 4.112648010253906, |
|
"learning_rate": 2.036279926335175e-05, |
|
"loss": 2.3621, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.9668508287292817, |
|
"grad_norm": 3.577320098876953, |
|
"learning_rate": 2.0332105586249235e-05, |
|
"loss": 2.2561, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.9699201964395334, |
|
"grad_norm": 4.021890163421631, |
|
"learning_rate": 2.030141190914672e-05, |
|
"loss": 2.3077, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.9729895641497851, |
|
"grad_norm": 3.277940273284912, |
|
"learning_rate": 2.02707182320442e-05, |
|
"loss": 2.2639, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.9760589318600368, |
|
"grad_norm": 4.129881858825684, |
|
"learning_rate": 2.0240024554941684e-05, |
|
"loss": 2.3038, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.9791282995702886, |
|
"grad_norm": 3.3244733810424805, |
|
"learning_rate": 2.0209330877839168e-05, |
|
"loss": 2.346, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.9821976672805403, |
|
"grad_norm": 3.341198444366455, |
|
"learning_rate": 2.017863720073665e-05, |
|
"loss": 2.321, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9852670349907919, |
|
"grad_norm": 3.724945306777954, |
|
"learning_rate": 2.0147943523634133e-05, |
|
"loss": 2.2743, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.9883364027010436, |
|
"grad_norm": 3.501654624938965, |
|
"learning_rate": 2.0117249846531614e-05, |
|
"loss": 2.2676, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.9914057704112953, |
|
"grad_norm": 4.758657932281494, |
|
"learning_rate": 2.0086556169429098e-05, |
|
"loss": 2.3508, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.994475138121547, |
|
"grad_norm": 3.5216405391693115, |
|
"learning_rate": 2.0055862492326582e-05, |
|
"loss": 2.2924, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.9975445058317987, |
|
"grad_norm": 12.397980690002441, |
|
"learning_rate": 2.0025168815224063e-05, |
|
"loss": 2.2766, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.0006138735420504, |
|
"grad_norm": 3.59272837638855, |
|
"learning_rate": 1.9994475138121548e-05, |
|
"loss": 2.271, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.003683241252302, |
|
"grad_norm": 2.9649710655212402, |
|
"learning_rate": 1.9963781461019032e-05, |
|
"loss": 2.2595, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.0067526089625538, |
|
"grad_norm": 3.3721020221710205, |
|
"learning_rate": 1.9933087783916513e-05, |
|
"loss": 2.2275, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.0098219766728054, |
|
"grad_norm": 3.2784862518310547, |
|
"learning_rate": 1.9902394106813997e-05, |
|
"loss": 2.3262, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.0128913443830572, |
|
"grad_norm": 3.301400661468506, |
|
"learning_rate": 1.987170042971148e-05, |
|
"loss": 2.2383, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.0159607120933087, |
|
"grad_norm": 2.7174506187438965, |
|
"learning_rate": 1.9841006752608962e-05, |
|
"loss": 2.266, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.0190300798035605, |
|
"grad_norm": 4.05548095703125, |
|
"learning_rate": 1.9810313075506446e-05, |
|
"loss": 2.2317, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.022099447513812, |
|
"grad_norm": 3.362386703491211, |
|
"learning_rate": 1.977961939840393e-05, |
|
"loss": 2.2317, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.025168815224064, |
|
"grad_norm": 3.4570345878601074, |
|
"learning_rate": 1.974892572130141e-05, |
|
"loss": 2.3333, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.0282381829343155, |
|
"grad_norm": 3.4020121097564697, |
|
"learning_rate": 1.9718232044198895e-05, |
|
"loss": 2.2965, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.0313075506445673, |
|
"grad_norm": 3.3160858154296875, |
|
"learning_rate": 1.9687538367096376e-05, |
|
"loss": 2.2659, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.0343769183548188, |
|
"grad_norm": 3.5787899494171143, |
|
"learning_rate": 1.965684468999386e-05, |
|
"loss": 2.3484, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.0374462860650706, |
|
"grad_norm": 4.029461860656738, |
|
"learning_rate": 1.9626151012891345e-05, |
|
"loss": 2.3333, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.0405156537753222, |
|
"grad_norm": 3.743760824203491, |
|
"learning_rate": 1.9595457335788825e-05, |
|
"loss": 2.2458, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.043585021485574, |
|
"grad_norm": 3.3272945880889893, |
|
"learning_rate": 1.956476365868631e-05, |
|
"loss": 2.3081, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.0466543891958258, |
|
"grad_norm": 3.3701705932617188, |
|
"learning_rate": 1.9534069981583794e-05, |
|
"loss": 2.19, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.0497237569060773, |
|
"grad_norm": 4.10990571975708, |
|
"learning_rate": 1.9503376304481275e-05, |
|
"loss": 2.2962, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.0527931246163291, |
|
"grad_norm": 3.226930856704712, |
|
"learning_rate": 1.947268262737876e-05, |
|
"loss": 2.3523, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.0558624923265807, |
|
"grad_norm": 3.901716947555542, |
|
"learning_rate": 1.9441988950276247e-05, |
|
"loss": 2.244, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.0589318600368325, |
|
"grad_norm": 3.943704128265381, |
|
"learning_rate": 1.9411295273173727e-05, |
|
"loss": 2.3179, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.062001227747084, |
|
"grad_norm": 3.7991671562194824, |
|
"learning_rate": 1.938060159607121e-05, |
|
"loss": 2.1783, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.0650705954573358, |
|
"grad_norm": 3.63051438331604, |
|
"learning_rate": 1.9349907918968696e-05, |
|
"loss": 2.2307, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.0681399631675874, |
|
"grad_norm": 3.9437952041625977, |
|
"learning_rate": 1.9319214241866177e-05, |
|
"loss": 2.2864, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.0712093308778392, |
|
"grad_norm": 3.544645309448242, |
|
"learning_rate": 1.928852056476366e-05, |
|
"loss": 2.3161, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.0742786985880908, |
|
"grad_norm": 3.8155930042266846, |
|
"learning_rate": 1.9257826887661142e-05, |
|
"loss": 2.2036, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.0773480662983426, |
|
"grad_norm": 3.956928014755249, |
|
"learning_rate": 1.9227133210558626e-05, |
|
"loss": 2.3254, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.0804174340085941, |
|
"grad_norm": 2.997299909591675, |
|
"learning_rate": 1.919643953345611e-05, |
|
"loss": 2.2741, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.083486801718846, |
|
"grad_norm": 3.1786820888519287, |
|
"learning_rate": 1.916574585635359e-05, |
|
"loss": 2.2991, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.0865561694290977, |
|
"grad_norm": 3.475252151489258, |
|
"learning_rate": 1.9135052179251075e-05, |
|
"loss": 2.2742, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.0896255371393493, |
|
"grad_norm": 3.8195457458496094, |
|
"learning_rate": 1.910435850214856e-05, |
|
"loss": 2.2437, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.092694904849601, |
|
"grad_norm": 3.205800771713257, |
|
"learning_rate": 1.907366482504604e-05, |
|
"loss": 2.3202, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.0957642725598526, |
|
"grad_norm": 4.425097465515137, |
|
"learning_rate": 1.9042971147943524e-05, |
|
"loss": 2.2275, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.0988336402701044, |
|
"grad_norm": 3.6546781063079834, |
|
"learning_rate": 1.901227747084101e-05, |
|
"loss": 2.2484, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.101903007980356, |
|
"grad_norm": 3.9607748985290527, |
|
"learning_rate": 1.898158379373849e-05, |
|
"loss": 2.3012, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.1049723756906078, |
|
"grad_norm": 3.728654623031616, |
|
"learning_rate": 1.8950890116635974e-05, |
|
"loss": 2.2324, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.1080417434008594, |
|
"grad_norm": 4.3351149559021, |
|
"learning_rate": 1.8920196439533458e-05, |
|
"loss": 2.2714, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 3.8495333194732666, |
|
"learning_rate": 1.888950276243094e-05, |
|
"loss": 2.2416, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.1141804788213627, |
|
"grad_norm": 3.4237616062164307, |
|
"learning_rate": 1.8858809085328423e-05, |
|
"loss": 2.2531, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.1172498465316145, |
|
"grad_norm": 4.699343681335449, |
|
"learning_rate": 1.8828115408225904e-05, |
|
"loss": 2.299, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.120319214241866, |
|
"grad_norm": 3.130164861679077, |
|
"learning_rate": 1.8797421731123388e-05, |
|
"loss": 2.2208, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.1233885819521179, |
|
"grad_norm": 3.84944224357605, |
|
"learning_rate": 1.8766728054020872e-05, |
|
"loss": 2.2946, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.1264579496623695, |
|
"grad_norm": 3.48579478263855, |
|
"learning_rate": 1.8736034376918353e-05, |
|
"loss": 2.2766, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.1295273173726212, |
|
"grad_norm": 3.44059157371521, |
|
"learning_rate": 1.8705340699815837e-05, |
|
"loss": 2.3324, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.132596685082873, |
|
"grad_norm": 4.056128978729248, |
|
"learning_rate": 1.867464702271332e-05, |
|
"loss": 2.2425, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.1356660527931246, |
|
"grad_norm": 3.4333908557891846, |
|
"learning_rate": 1.8643953345610802e-05, |
|
"loss": 2.2956, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.1387354205033764, |
|
"grad_norm": 3.3986668586730957, |
|
"learning_rate": 1.8613259668508287e-05, |
|
"loss": 2.2745, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.141804788213628, |
|
"grad_norm": 3.854893684387207, |
|
"learning_rate": 1.858256599140577e-05, |
|
"loss": 2.2604, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.1448741559238798, |
|
"grad_norm": 3.557697296142578, |
|
"learning_rate": 1.8551872314303252e-05, |
|
"loss": 2.2547, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.1479435236341313, |
|
"grad_norm": 3.741943597793579, |
|
"learning_rate": 1.8521178637200736e-05, |
|
"loss": 2.3682, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.1510128913443831, |
|
"grad_norm": 4.78795862197876, |
|
"learning_rate": 1.849048496009822e-05, |
|
"loss": 2.2635, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.1540822590546347, |
|
"grad_norm": 2.904060125350952, |
|
"learning_rate": 1.8459791282995704e-05, |
|
"loss": 2.2896, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.1571516267648865, |
|
"grad_norm": 4.221961498260498, |
|
"learning_rate": 1.842909760589319e-05, |
|
"loss": 2.2288, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.160220994475138, |
|
"grad_norm": 3.9817323684692383, |
|
"learning_rate": 1.839840392879067e-05, |
|
"loss": 2.314, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.1632903621853898, |
|
"grad_norm": 4.283735275268555, |
|
"learning_rate": 1.8367710251688154e-05, |
|
"loss": 2.3065, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.1663597298956414, |
|
"grad_norm": 4.681687831878662, |
|
"learning_rate": 1.8337016574585638e-05, |
|
"loss": 2.2481, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.1694290976058932, |
|
"grad_norm": 5.139055252075195, |
|
"learning_rate": 1.830632289748312e-05, |
|
"loss": 2.2553, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.1724984653161448, |
|
"grad_norm": 2.987617015838623, |
|
"learning_rate": 1.8275629220380603e-05, |
|
"loss": 2.2837, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.1755678330263966, |
|
"grad_norm": 3.6754627227783203, |
|
"learning_rate": 1.8244935543278087e-05, |
|
"loss": 2.3089, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.1786372007366483, |
|
"grad_norm": 4.376922607421875, |
|
"learning_rate": 1.8214241866175568e-05, |
|
"loss": 2.3423, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.1817065684469, |
|
"grad_norm": 3.4154927730560303, |
|
"learning_rate": 1.8183548189073052e-05, |
|
"loss": 2.2326, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.1847759361571517, |
|
"grad_norm": 3.935561418533325, |
|
"learning_rate": 1.8152854511970536e-05, |
|
"loss": 2.2588, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.1878453038674033, |
|
"grad_norm": 3.814129114151001, |
|
"learning_rate": 1.8122160834868017e-05, |
|
"loss": 2.2798, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.190914671577655, |
|
"grad_norm": 4.349081516265869, |
|
"learning_rate": 1.80914671577655e-05, |
|
"loss": 2.2618, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.1939840392879066, |
|
"grad_norm": 4.567361354827881, |
|
"learning_rate": 1.8060773480662986e-05, |
|
"loss": 2.326, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.1970534069981584, |
|
"grad_norm": 4.0694427490234375, |
|
"learning_rate": 1.8030079803560467e-05, |
|
"loss": 2.3532, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.20012277470841, |
|
"grad_norm": 4.104779243469238, |
|
"learning_rate": 1.799938612645795e-05, |
|
"loss": 2.2995, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.2031921424186618, |
|
"grad_norm": 3.412951707839966, |
|
"learning_rate": 1.796869244935543e-05, |
|
"loss": 2.3195, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.2062615101289134, |
|
"grad_norm": 3.1561272144317627, |
|
"learning_rate": 1.7937998772252916e-05, |
|
"loss": 2.2425, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.2093308778391652, |
|
"grad_norm": 3.319150924682617, |
|
"learning_rate": 1.79073050951504e-05, |
|
"loss": 2.3061, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.212400245549417, |
|
"grad_norm": 3.917623281478882, |
|
"learning_rate": 1.787661141804788e-05, |
|
"loss": 2.2989, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.2154696132596685, |
|
"grad_norm": 3.800072193145752, |
|
"learning_rate": 1.7845917740945365e-05, |
|
"loss": 2.2609, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.21853898096992, |
|
"grad_norm": 3.723968505859375, |
|
"learning_rate": 1.781522406384285e-05, |
|
"loss": 2.3172, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.2216083486801719, |
|
"grad_norm": 4.040971755981445, |
|
"learning_rate": 1.778453038674033e-05, |
|
"loss": 2.224, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.2246777163904237, |
|
"grad_norm": 3.918321132659912, |
|
"learning_rate": 1.7753836709637814e-05, |
|
"loss": 2.3556, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.2277470841006752, |
|
"grad_norm": 4.419713973999023, |
|
"learning_rate": 1.77231430325353e-05, |
|
"loss": 2.3278, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.230816451810927, |
|
"grad_norm": 4.213504791259766, |
|
"learning_rate": 1.769244935543278e-05, |
|
"loss": 2.2026, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.2338858195211786, |
|
"grad_norm": 3.972687005996704, |
|
"learning_rate": 1.7661755678330264e-05, |
|
"loss": 2.2754, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.2369551872314304, |
|
"grad_norm": 4.094639301300049, |
|
"learning_rate": 1.7631062001227748e-05, |
|
"loss": 2.2452, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.240024554941682, |
|
"grad_norm": 2.83046817779541, |
|
"learning_rate": 1.760036832412523e-05, |
|
"loss": 2.3677, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.2430939226519337, |
|
"grad_norm": 3.1770524978637695, |
|
"learning_rate": 1.7569674647022713e-05, |
|
"loss": 2.2744, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.2461632903621853, |
|
"grad_norm": 3.803001880645752, |
|
"learning_rate": 1.7538980969920194e-05, |
|
"loss": 2.1896, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.249232658072437, |
|
"grad_norm": 2.9435923099517822, |
|
"learning_rate": 1.7508287292817678e-05, |
|
"loss": 2.2679, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.2523020257826887, |
|
"grad_norm": 3.0736653804779053, |
|
"learning_rate": 1.7477593615715166e-05, |
|
"loss": 2.2378, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.2553713934929405, |
|
"grad_norm": 4.1547627449035645, |
|
"learning_rate": 1.7446899938612646e-05, |
|
"loss": 2.2726, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.2584407612031923, |
|
"grad_norm": 4.235386848449707, |
|
"learning_rate": 1.741620626151013e-05, |
|
"loss": 2.291, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.2615101289134438, |
|
"grad_norm": 3.412493944168091, |
|
"learning_rate": 1.7385512584407615e-05, |
|
"loss": 2.3055, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.2645794966236954, |
|
"grad_norm": 3.837425947189331, |
|
"learning_rate": 1.7354818907305096e-05, |
|
"loss": 2.3072, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.2676488643339472, |
|
"grad_norm": 3.7470505237579346, |
|
"learning_rate": 1.732412523020258e-05, |
|
"loss": 2.2997, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.270718232044199, |
|
"grad_norm": 4.290903568267822, |
|
"learning_rate": 1.7293431553100064e-05, |
|
"loss": 2.3537, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.2737875997544506, |
|
"grad_norm": 4.011292457580566, |
|
"learning_rate": 1.7262737875997545e-05, |
|
"loss": 2.1966, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.2768569674647023, |
|
"grad_norm": 3.7366220951080322, |
|
"learning_rate": 1.723204419889503e-05, |
|
"loss": 2.2752, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.279926335174954, |
|
"grad_norm": 3.9714245796203613, |
|
"learning_rate": 1.7201350521792513e-05, |
|
"loss": 2.2664, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.2829957028852057, |
|
"grad_norm": 3.8838491439819336, |
|
"learning_rate": 1.7170656844689994e-05, |
|
"loss": 2.2699, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.2860650705954573, |
|
"grad_norm": 4.699042320251465, |
|
"learning_rate": 1.713996316758748e-05, |
|
"loss": 2.314, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.289134438305709, |
|
"grad_norm": 3.9477968215942383, |
|
"learning_rate": 1.710926949048496e-05, |
|
"loss": 2.1777, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.2922038060159606, |
|
"grad_norm": 3.690079927444458, |
|
"learning_rate": 1.7078575813382444e-05, |
|
"loss": 2.2705, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.2952731737262124, |
|
"grad_norm": 3.5377986431121826, |
|
"learning_rate": 1.7047882136279928e-05, |
|
"loss": 2.2185, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.298342541436464, |
|
"grad_norm": 4.657019138336182, |
|
"learning_rate": 1.701718845917741e-05, |
|
"loss": 2.3273, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.3014119091467158, |
|
"grad_norm": 4.9236040115356445, |
|
"learning_rate": 1.6986494782074893e-05, |
|
"loss": 2.2613, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.3044812768569676, |
|
"grad_norm": 3.1163363456726074, |
|
"learning_rate": 1.6955801104972377e-05, |
|
"loss": 2.2433, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.3075506445672191, |
|
"grad_norm": 3.7134690284729004, |
|
"learning_rate": 1.6925107427869858e-05, |
|
"loss": 2.3434, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.3106200122774707, |
|
"grad_norm": 3.383561372756958, |
|
"learning_rate": 1.6894413750767342e-05, |
|
"loss": 2.1809, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.3136893799877225, |
|
"grad_norm": 4.129547595977783, |
|
"learning_rate": 1.6863720073664826e-05, |
|
"loss": 2.3241, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.3167587476979743, |
|
"grad_norm": 3.206624984741211, |
|
"learning_rate": 1.6833026396562307e-05, |
|
"loss": 2.2917, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.3198281154082259, |
|
"grad_norm": 3.993472099304199, |
|
"learning_rate": 1.680233271945979e-05, |
|
"loss": 2.2888, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.3228974831184777, |
|
"grad_norm": 3.8573262691497803, |
|
"learning_rate": 1.6771639042357276e-05, |
|
"loss": 2.3034, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.3259668508287292, |
|
"grad_norm": 4.161017417907715, |
|
"learning_rate": 1.6740945365254756e-05, |
|
"loss": 2.3173, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.329036218538981, |
|
"grad_norm": 3.6462788581848145, |
|
"learning_rate": 1.671025168815224e-05, |
|
"loss": 2.2411, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.3321055862492326, |
|
"grad_norm": 5.160103797912598, |
|
"learning_rate": 1.667955801104972e-05, |
|
"loss": 2.3113, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.3351749539594844, |
|
"grad_norm": 8.680712699890137, |
|
"learning_rate": 1.6648864333947206e-05, |
|
"loss": 2.2769, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.3382443216697362, |
|
"grad_norm": 4.962557315826416, |
|
"learning_rate": 1.661817065684469e-05, |
|
"loss": 2.2755, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.3413136893799877, |
|
"grad_norm": 4.264275074005127, |
|
"learning_rate": 1.658747697974217e-05, |
|
"loss": 2.2863, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 1.3443830570902393, |
|
"grad_norm": 4.581940650939941, |
|
"learning_rate": 1.6556783302639655e-05, |
|
"loss": 2.4296, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.347452424800491, |
|
"grad_norm": 3.814467430114746, |
|
"learning_rate": 1.652608962553714e-05, |
|
"loss": 2.258, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 1.350521792510743, |
|
"grad_norm": 3.2274892330169678, |
|
"learning_rate": 1.6495395948434623e-05, |
|
"loss": 2.2739, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.3535911602209945, |
|
"grad_norm": 3.3576676845550537, |
|
"learning_rate": 1.6464702271332108e-05, |
|
"loss": 2.3536, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 1.356660527931246, |
|
"grad_norm": 3.561453104019165, |
|
"learning_rate": 1.6434008594229592e-05, |
|
"loss": 2.338, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.3597298956414978, |
|
"grad_norm": 3.8528378009796143, |
|
"learning_rate": 1.6403314917127073e-05, |
|
"loss": 2.2749, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 1.3627992633517496, |
|
"grad_norm": 3.7933218479156494, |
|
"learning_rate": 1.6372621240024557e-05, |
|
"loss": 2.1698, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.3658686310620012, |
|
"grad_norm": 4.1472578048706055, |
|
"learning_rate": 1.634192756292204e-05, |
|
"loss": 2.2975, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.368937998772253, |
|
"grad_norm": 4.129203796386719, |
|
"learning_rate": 1.6311233885819522e-05, |
|
"loss": 2.2525, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.3720073664825045, |
|
"grad_norm": 4.041978359222412, |
|
"learning_rate": 1.6280540208717006e-05, |
|
"loss": 2.3245, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 1.3750767341927563, |
|
"grad_norm": 3.7860097885131836, |
|
"learning_rate": 1.6249846531614487e-05, |
|
"loss": 2.2678, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.378146101903008, |
|
"grad_norm": 3.5321691036224365, |
|
"learning_rate": 1.621915285451197e-05, |
|
"loss": 2.2928, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 1.3812154696132597, |
|
"grad_norm": 3.8494341373443604, |
|
"learning_rate": 1.6188459177409455e-05, |
|
"loss": 2.3158, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.3842848373235115, |
|
"grad_norm": 3.3036093711853027, |
|
"learning_rate": 1.6157765500306936e-05, |
|
"loss": 2.1744, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 1.387354205033763, |
|
"grad_norm": 3.412515163421631, |
|
"learning_rate": 1.612707182320442e-05, |
|
"loss": 2.2236, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.3904235727440146, |
|
"grad_norm": 3.514040470123291, |
|
"learning_rate": 1.6096378146101905e-05, |
|
"loss": 2.2406, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 1.3934929404542664, |
|
"grad_norm": 3.6580166816711426, |
|
"learning_rate": 1.6065684468999386e-05, |
|
"loss": 2.2914, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.3965623081645182, |
|
"grad_norm": 4.870865821838379, |
|
"learning_rate": 1.603499079189687e-05, |
|
"loss": 2.2971, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.3996316758747698, |
|
"grad_norm": 4.047878742218018, |
|
"learning_rate": 1.6004297114794354e-05, |
|
"loss": 2.3312, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.4027010435850216, |
|
"grad_norm": 3.317620038986206, |
|
"learning_rate": 1.5973603437691835e-05, |
|
"loss": 2.2121, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 1.4057704112952731, |
|
"grad_norm": 3.2293405532836914, |
|
"learning_rate": 1.594290976058932e-05, |
|
"loss": 2.268, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.408839779005525, |
|
"grad_norm": 3.658886194229126, |
|
"learning_rate": 1.5912216083486803e-05, |
|
"loss": 2.2022, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 1.4119091467157765, |
|
"grad_norm": 4.797260761260986, |
|
"learning_rate": 1.5881522406384284e-05, |
|
"loss": 2.2916, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.4149785144260283, |
|
"grad_norm": 4.262215614318848, |
|
"learning_rate": 1.585082872928177e-05, |
|
"loss": 2.2257, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 1.4180478821362799, |
|
"grad_norm": 3.0167343616485596, |
|
"learning_rate": 1.582013505217925e-05, |
|
"loss": 2.2285, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.4211172498465316, |
|
"grad_norm": 3.6330764293670654, |
|
"learning_rate": 1.5789441375076733e-05, |
|
"loss": 2.3057, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 1.4241866175567832, |
|
"grad_norm": 4.605088233947754, |
|
"learning_rate": 1.5758747697974218e-05, |
|
"loss": 2.2406, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.427255985267035, |
|
"grad_norm": 3.957474708557129, |
|
"learning_rate": 1.57280540208717e-05, |
|
"loss": 2.2519, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 1.4303253529772868, |
|
"grad_norm": 3.5939078330993652, |
|
"learning_rate": 1.5697360343769183e-05, |
|
"loss": 2.2892, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.4333947206875384, |
|
"grad_norm": 3.805011034011841, |
|
"learning_rate": 1.5666666666666667e-05, |
|
"loss": 2.2179, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 1.43646408839779, |
|
"grad_norm": 3.5911526679992676, |
|
"learning_rate": 1.5635972989564148e-05, |
|
"loss": 2.313, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.4395334561080417, |
|
"grad_norm": 3.6143059730529785, |
|
"learning_rate": 1.5605279312461632e-05, |
|
"loss": 2.2352, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 1.4426028238182935, |
|
"grad_norm": 4.9773077964782715, |
|
"learning_rate": 1.5574585635359116e-05, |
|
"loss": 2.266, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.445672191528545, |
|
"grad_norm": 3.49001407623291, |
|
"learning_rate": 1.5543891958256597e-05, |
|
"loss": 2.4199, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 1.4487415592387969, |
|
"grad_norm": 4.041284084320068, |
|
"learning_rate": 1.551319828115408e-05, |
|
"loss": 2.2682, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.4518109269490485, |
|
"grad_norm": 4.0507121086120605, |
|
"learning_rate": 1.548250460405157e-05, |
|
"loss": 2.3086, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 1.4548802946593002, |
|
"grad_norm": 4.48442268371582, |
|
"learning_rate": 1.545181092694905e-05, |
|
"loss": 2.2863, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.4579496623695518, |
|
"grad_norm": 4.268632888793945, |
|
"learning_rate": 1.5421117249846534e-05, |
|
"loss": 2.2778, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 1.4610190300798036, |
|
"grad_norm": 3.334290027618408, |
|
"learning_rate": 1.5390423572744015e-05, |
|
"loss": 2.2268, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.4640883977900552, |
|
"grad_norm": 4.395374774932861, |
|
"learning_rate": 1.53597298956415e-05, |
|
"loss": 2.3163, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 1.467157765500307, |
|
"grad_norm": 4.427293300628662, |
|
"learning_rate": 1.5329036218538983e-05, |
|
"loss": 2.3159, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.4702271332105585, |
|
"grad_norm": 3.552321195602417, |
|
"learning_rate": 1.5298342541436464e-05, |
|
"loss": 2.3377, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 1.4732965009208103, |
|
"grad_norm": 3.2035748958587646, |
|
"learning_rate": 1.5267648864333948e-05, |
|
"loss": 2.2654, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.4763658686310621, |
|
"grad_norm": 3.877993106842041, |
|
"learning_rate": 1.5236955187231432e-05, |
|
"loss": 2.3279, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 1.4794352363413137, |
|
"grad_norm": 4.105770111083984, |
|
"learning_rate": 1.5206261510128913e-05, |
|
"loss": 2.2001, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.4825046040515653, |
|
"grad_norm": 5.055785655975342, |
|
"learning_rate": 1.5175567833026397e-05, |
|
"loss": 2.2383, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 1.485573971761817, |
|
"grad_norm": 3.5279541015625, |
|
"learning_rate": 1.5144874155923882e-05, |
|
"loss": 2.2956, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.4886433394720688, |
|
"grad_norm": 3.2130086421966553, |
|
"learning_rate": 1.5114180478821363e-05, |
|
"loss": 2.2689, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 1.4917127071823204, |
|
"grad_norm": 3.5005886554718018, |
|
"learning_rate": 1.5083486801718847e-05, |
|
"loss": 2.226, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.4947820748925722, |
|
"grad_norm": 3.9424734115600586, |
|
"learning_rate": 1.5052793124616331e-05, |
|
"loss": 2.207, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 1.4978514426028238, |
|
"grad_norm": 3.7467117309570312, |
|
"learning_rate": 1.5022099447513812e-05, |
|
"loss": 2.2265, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.5009208103130756, |
|
"grad_norm": 3.600050926208496, |
|
"learning_rate": 1.4991405770411296e-05, |
|
"loss": 2.3054, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 1.5039901780233271, |
|
"grad_norm": 3.9778027534484863, |
|
"learning_rate": 1.4960712093308779e-05, |
|
"loss": 2.2486, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.507059545733579, |
|
"grad_norm": 5.874206066131592, |
|
"learning_rate": 1.4930018416206261e-05, |
|
"loss": 2.2713, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 1.5101289134438307, |
|
"grad_norm": 3.219372034072876, |
|
"learning_rate": 1.4899324739103745e-05, |
|
"loss": 2.2801, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.5131982811540823, |
|
"grad_norm": 4.86896276473999, |
|
"learning_rate": 1.4868631062001228e-05, |
|
"loss": 2.3152, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 1.5162676488643339, |
|
"grad_norm": 3.7367022037506104, |
|
"learning_rate": 1.483793738489871e-05, |
|
"loss": 2.2584, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.5193370165745856, |
|
"grad_norm": 6.774600028991699, |
|
"learning_rate": 1.4807243707796193e-05, |
|
"loss": 2.2155, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 1.5224063842848374, |
|
"grad_norm": 3.1714091300964355, |
|
"learning_rate": 1.4776550030693677e-05, |
|
"loss": 2.317, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.525475751995089, |
|
"grad_norm": 3.4561657905578613, |
|
"learning_rate": 1.4745856353591161e-05, |
|
"loss": 2.2814, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 1.5285451197053406, |
|
"grad_norm": 3.321249485015869, |
|
"learning_rate": 1.4715162676488644e-05, |
|
"loss": 2.2776, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.5316144874155924, |
|
"grad_norm": 2.9775593280792236, |
|
"learning_rate": 1.4684468999386128e-05, |
|
"loss": 2.2629, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 1.5346838551258442, |
|
"grad_norm": 2.6327016353607178, |
|
"learning_rate": 1.465377532228361e-05, |
|
"loss": 2.2041, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.5377532228360957, |
|
"grad_norm": 4.254408359527588, |
|
"learning_rate": 1.4623081645181093e-05, |
|
"loss": 2.2525, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 1.5408225905463473, |
|
"grad_norm": 4.363503932952881, |
|
"learning_rate": 1.4592387968078576e-05, |
|
"loss": 2.211, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.5438919582565993, |
|
"grad_norm": 4.6273579597473145, |
|
"learning_rate": 1.456169429097606e-05, |
|
"loss": 2.308, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 1.5469613259668509, |
|
"grad_norm": 3.543792247772217, |
|
"learning_rate": 1.4531000613873542e-05, |
|
"loss": 2.2355, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.5500306936771024, |
|
"grad_norm": 3.429605007171631, |
|
"learning_rate": 1.4500306936771025e-05, |
|
"loss": 2.2436, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 1.5531000613873542, |
|
"grad_norm": 4.589274883270264, |
|
"learning_rate": 1.446961325966851e-05, |
|
"loss": 2.3328, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.556169429097606, |
|
"grad_norm": 3.7569265365600586, |
|
"learning_rate": 1.4438919582565992e-05, |
|
"loss": 2.318, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 1.5592387968078576, |
|
"grad_norm": 4.732515811920166, |
|
"learning_rate": 1.4408225905463474e-05, |
|
"loss": 2.2883, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.5623081645181092, |
|
"grad_norm": 3.1093533039093018, |
|
"learning_rate": 1.4377532228360957e-05, |
|
"loss": 2.3087, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 1.565377532228361, |
|
"grad_norm": 5.486563682556152, |
|
"learning_rate": 1.4346838551258441e-05, |
|
"loss": 2.3276, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.5684468999386127, |
|
"grad_norm": 4.043442249298096, |
|
"learning_rate": 1.4316144874155923e-05, |
|
"loss": 2.1952, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 1.5715162676488643, |
|
"grad_norm": 3.298995018005371, |
|
"learning_rate": 1.4285451197053406e-05, |
|
"loss": 2.2533, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.5745856353591159, |
|
"grad_norm": 3.928128719329834, |
|
"learning_rate": 1.4254757519950892e-05, |
|
"loss": 2.2624, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 1.5776550030693677, |
|
"grad_norm": 4.050337791442871, |
|
"learning_rate": 1.4224063842848374e-05, |
|
"loss": 2.1662, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.5807243707796195, |
|
"grad_norm": 4.037144660949707, |
|
"learning_rate": 1.4193370165745857e-05, |
|
"loss": 2.3193, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 1.583793738489871, |
|
"grad_norm": 5.330986976623535, |
|
"learning_rate": 1.416267648864334e-05, |
|
"loss": 2.2778, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.5868631062001226, |
|
"grad_norm": 4.488786697387695, |
|
"learning_rate": 1.4131982811540824e-05, |
|
"loss": 2.2893, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 1.5899324739103746, |
|
"grad_norm": 3.4088134765625, |
|
"learning_rate": 1.4101289134438306e-05, |
|
"loss": 2.3159, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.5930018416206262, |
|
"grad_norm": 4.404228687286377, |
|
"learning_rate": 1.4070595457335789e-05, |
|
"loss": 2.2411, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 1.5960712093308778, |
|
"grad_norm": 3.9491429328918457, |
|
"learning_rate": 1.4039901780233273e-05, |
|
"loss": 2.2032, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.5991405770411296, |
|
"grad_norm": 3.8530337810516357, |
|
"learning_rate": 1.4009208103130756e-05, |
|
"loss": 2.1635, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 1.6022099447513813, |
|
"grad_norm": 6.127511978149414, |
|
"learning_rate": 1.3978514426028238e-05, |
|
"loss": 2.287, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.605279312461633, |
|
"grad_norm": 3.831045389175415, |
|
"learning_rate": 1.394782074892572e-05, |
|
"loss": 2.2275, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 1.6083486801718845, |
|
"grad_norm": 3.487755060195923, |
|
"learning_rate": 1.3917127071823205e-05, |
|
"loss": 2.302, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.6114180478821363, |
|
"grad_norm": 3.54748272895813, |
|
"learning_rate": 1.3886433394720687e-05, |
|
"loss": 2.2559, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 1.614487415592388, |
|
"grad_norm": 3.1733102798461914, |
|
"learning_rate": 1.385573971761817e-05, |
|
"loss": 2.2246, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.6175567833026396, |
|
"grad_norm": 3.280029773712158, |
|
"learning_rate": 1.3825046040515654e-05, |
|
"loss": 2.223, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 1.6206261510128912, |
|
"grad_norm": 4.188273906707764, |
|
"learning_rate": 1.3794352363413137e-05, |
|
"loss": 2.2478, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.623695518723143, |
|
"grad_norm": 4.134437084197998, |
|
"learning_rate": 1.376365868631062e-05, |
|
"loss": 2.1995, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 1.6267648864333948, |
|
"grad_norm": 3.6614558696746826, |
|
"learning_rate": 1.3732965009208103e-05, |
|
"loss": 2.3007, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.6298342541436464, |
|
"grad_norm": 3.6023659706115723, |
|
"learning_rate": 1.3702271332105588e-05, |
|
"loss": 2.2745, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 1.6329036218538981, |
|
"grad_norm": 4.1788201332092285, |
|
"learning_rate": 1.367157765500307e-05, |
|
"loss": 2.2729, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.63597298956415, |
|
"grad_norm": 3.9169983863830566, |
|
"learning_rate": 1.3640883977900553e-05, |
|
"loss": 2.2836, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 1.6390423572744015, |
|
"grad_norm": 3.853062152862549, |
|
"learning_rate": 1.3610190300798037e-05, |
|
"loss": 2.271, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.642111724984653, |
|
"grad_norm": 4.5239667892456055, |
|
"learning_rate": 1.357949662369552e-05, |
|
"loss": 2.2514, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 1.6451810926949049, |
|
"grad_norm": 4.2847065925598145, |
|
"learning_rate": 1.3548802946593002e-05, |
|
"loss": 2.2787, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.6482504604051567, |
|
"grad_norm": 4.32819128036499, |
|
"learning_rate": 1.3518109269490484e-05, |
|
"loss": 2.1954, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 1.6513198281154082, |
|
"grad_norm": 4.206119537353516, |
|
"learning_rate": 1.3487415592387969e-05, |
|
"loss": 2.2867, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.6543891958256598, |
|
"grad_norm": 3.985600709915161, |
|
"learning_rate": 1.3456721915285451e-05, |
|
"loss": 2.2126, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 1.6574585635359116, |
|
"grad_norm": 3.822664976119995, |
|
"learning_rate": 1.3426028238182934e-05, |
|
"loss": 2.2767, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.6605279312461634, |
|
"grad_norm": 4.091802597045898, |
|
"learning_rate": 1.3395334561080418e-05, |
|
"loss": 2.2247, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 1.663597298956415, |
|
"grad_norm": 4.74222993850708, |
|
"learning_rate": 1.33646408839779e-05, |
|
"loss": 2.2001, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 3.1740357875823975, |
|
"learning_rate": 1.3333947206875383e-05, |
|
"loss": 2.2442, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 1.6697360343769183, |
|
"grad_norm": 5.706885814666748, |
|
"learning_rate": 1.3303253529772866e-05, |
|
"loss": 2.2529, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.67280540208717, |
|
"grad_norm": 4.168138027191162, |
|
"learning_rate": 1.3272559852670351e-05, |
|
"loss": 2.1694, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 1.6758747697974217, |
|
"grad_norm": 3.907432794570923, |
|
"learning_rate": 1.3241866175567834e-05, |
|
"loss": 2.2338, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.6789441375076735, |
|
"grad_norm": 3.9594688415527344, |
|
"learning_rate": 1.3211172498465316e-05, |
|
"loss": 2.2013, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 1.6820135052179253, |
|
"grad_norm": 3.2740478515625, |
|
"learning_rate": 1.31804788213628e-05, |
|
"loss": 2.2376, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.6850828729281768, |
|
"grad_norm": 5.300954341888428, |
|
"learning_rate": 1.3149785144260283e-05, |
|
"loss": 2.2501, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 1.6881522406384284, |
|
"grad_norm": 3.6815123558044434, |
|
"learning_rate": 1.3119091467157766e-05, |
|
"loss": 2.3304, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.6912216083486802, |
|
"grad_norm": 4.4728684425354, |
|
"learning_rate": 1.3088397790055248e-05, |
|
"loss": 2.2966, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 1.694290976058932, |
|
"grad_norm": 3.806849241256714, |
|
"learning_rate": 1.3057704112952733e-05, |
|
"loss": 2.1784, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.6973603437691835, |
|
"grad_norm": 3.8693387508392334, |
|
"learning_rate": 1.3027010435850215e-05, |
|
"loss": 2.1768, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 1.7004297114794351, |
|
"grad_norm": 3.4431064128875732, |
|
"learning_rate": 1.2996316758747698e-05, |
|
"loss": 2.2657, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.703499079189687, |
|
"grad_norm": 4.247345924377441, |
|
"learning_rate": 1.2965623081645182e-05, |
|
"loss": 2.2104, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 1.7065684468999387, |
|
"grad_norm": 4.055105209350586, |
|
"learning_rate": 1.2934929404542664e-05, |
|
"loss": 2.274, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.7096378146101903, |
|
"grad_norm": 3.7587838172912598, |
|
"learning_rate": 1.2904235727440147e-05, |
|
"loss": 2.278, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 1.7127071823204418, |
|
"grad_norm": 3.716425657272339, |
|
"learning_rate": 1.287354205033763e-05, |
|
"loss": 2.3438, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.7157765500306936, |
|
"grad_norm": 3.8528246879577637, |
|
"learning_rate": 1.2842848373235114e-05, |
|
"loss": 2.3489, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 1.7188459177409454, |
|
"grad_norm": 3.5920658111572266, |
|
"learning_rate": 1.2812154696132596e-05, |
|
"loss": 2.3107, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.721915285451197, |
|
"grad_norm": 3.0533790588378906, |
|
"learning_rate": 1.278146101903008e-05, |
|
"loss": 2.3603, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 1.7249846531614488, |
|
"grad_norm": 4.115893363952637, |
|
"learning_rate": 1.2750767341927565e-05, |
|
"loss": 2.2572, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.7280540208717006, |
|
"grad_norm": 3.350722074508667, |
|
"learning_rate": 1.2720073664825047e-05, |
|
"loss": 2.2774, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 1.7311233885819521, |
|
"grad_norm": 4.05141544342041, |
|
"learning_rate": 1.268937998772253e-05, |
|
"loss": 2.1578, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.7341927562922037, |
|
"grad_norm": 3.764138698577881, |
|
"learning_rate": 1.2658686310620012e-05, |
|
"loss": 2.2309, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 1.7372621240024555, |
|
"grad_norm": 3.2544310092926025, |
|
"learning_rate": 1.2627992633517496e-05, |
|
"loss": 2.2146, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.7403314917127073, |
|
"grad_norm": 4.030269622802734, |
|
"learning_rate": 1.2597298956414979e-05, |
|
"loss": 2.2981, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 1.7434008594229589, |
|
"grad_norm": 3.6446919441223145, |
|
"learning_rate": 1.2566605279312461e-05, |
|
"loss": 2.1891, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.7464702271332104, |
|
"grad_norm": 3.7096481323242188, |
|
"learning_rate": 1.2535911602209946e-05, |
|
"loss": 2.272, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 1.7495395948434622, |
|
"grad_norm": 3.4253058433532715, |
|
"learning_rate": 1.2505217925107428e-05, |
|
"loss": 2.2502, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.752608962553714, |
|
"grad_norm": 3.299448013305664, |
|
"learning_rate": 1.247452424800491e-05, |
|
"loss": 2.2742, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 1.7556783302639656, |
|
"grad_norm": 4.302381992340088, |
|
"learning_rate": 1.2443830570902393e-05, |
|
"loss": 2.2315, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.7587476979742172, |
|
"grad_norm": 3.4078803062438965, |
|
"learning_rate": 1.2413136893799877e-05, |
|
"loss": 2.3013, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 1.7618170656844692, |
|
"grad_norm": 2.8105528354644775, |
|
"learning_rate": 1.238244321669736e-05, |
|
"loss": 2.2035, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.7648864333947207, |
|
"grad_norm": 4.302020072937012, |
|
"learning_rate": 1.2351749539594843e-05, |
|
"loss": 2.306, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 1.7679558011049723, |
|
"grad_norm": 5.1633219718933105, |
|
"learning_rate": 1.2321055862492327e-05, |
|
"loss": 2.2469, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.771025168815224, |
|
"grad_norm": 3.7127487659454346, |
|
"learning_rate": 1.2290362185389811e-05, |
|
"loss": 2.2803, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 1.7740945365254759, |
|
"grad_norm": 3.1988329887390137, |
|
"learning_rate": 1.2259668508287293e-05, |
|
"loss": 2.2693, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.7771639042357275, |
|
"grad_norm": 4.184259414672852, |
|
"learning_rate": 1.2228974831184776e-05, |
|
"loss": 2.1331, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 1.780233271945979, |
|
"grad_norm": 4.31723690032959, |
|
"learning_rate": 1.219828115408226e-05, |
|
"loss": 2.3265, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.7833026396562308, |
|
"grad_norm": 3.367295742034912, |
|
"learning_rate": 1.2167587476979743e-05, |
|
"loss": 2.2231, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 1.7863720073664826, |
|
"grad_norm": 3.7550508975982666, |
|
"learning_rate": 1.2136893799877225e-05, |
|
"loss": 2.1928, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.7894413750767342, |
|
"grad_norm": 3.3911259174346924, |
|
"learning_rate": 1.210620012277471e-05, |
|
"loss": 2.2118, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 1.7925107427869857, |
|
"grad_norm": 3.8555543422698975, |
|
"learning_rate": 1.2075506445672192e-05, |
|
"loss": 2.2831, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.7955801104972375, |
|
"grad_norm": 3.8747925758361816, |
|
"learning_rate": 1.2044812768569675e-05, |
|
"loss": 2.1612, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 1.7986494782074893, |
|
"grad_norm": 4.418224334716797, |
|
"learning_rate": 1.2014119091467157e-05, |
|
"loss": 2.1782, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.801718845917741, |
|
"grad_norm": 3.63905668258667, |
|
"learning_rate": 1.1983425414364641e-05, |
|
"loss": 2.2919, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 1.8047882136279927, |
|
"grad_norm": 3.302374839782715, |
|
"learning_rate": 1.1952731737262124e-05, |
|
"loss": 2.2046, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.8078575813382445, |
|
"grad_norm": 4.592925548553467, |
|
"learning_rate": 1.1922038060159606e-05, |
|
"loss": 2.238, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 1.810926949048496, |
|
"grad_norm": 3.654604434967041, |
|
"learning_rate": 1.189134438305709e-05, |
|
"loss": 2.1764, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.8139963167587476, |
|
"grad_norm": 3.7106800079345703, |
|
"learning_rate": 1.1860650705954573e-05, |
|
"loss": 2.2601, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 1.8170656844689994, |
|
"grad_norm": 3.459660291671753, |
|
"learning_rate": 1.1829957028852056e-05, |
|
"loss": 2.2503, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.8201350521792512, |
|
"grad_norm": 3.504185676574707, |
|
"learning_rate": 1.179926335174954e-05, |
|
"loss": 2.258, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 1.8232044198895028, |
|
"grad_norm": 4.167102336883545, |
|
"learning_rate": 1.1768569674647024e-05, |
|
"loss": 2.1789, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.8262737875997543, |
|
"grad_norm": 4.083024978637695, |
|
"learning_rate": 1.1737875997544507e-05, |
|
"loss": 2.2965, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 1.8293431553100061, |
|
"grad_norm": 3.6207692623138428, |
|
"learning_rate": 1.1707182320441989e-05, |
|
"loss": 2.3554, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.832412523020258, |
|
"grad_norm": 3.8433992862701416, |
|
"learning_rate": 1.1676488643339473e-05, |
|
"loss": 2.2148, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 1.8354818907305095, |
|
"grad_norm": 4.200483798980713, |
|
"learning_rate": 1.1645794966236956e-05, |
|
"loss": 2.2467, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.838551258440761, |
|
"grad_norm": 4.590367794036865, |
|
"learning_rate": 1.1615101289134438e-05, |
|
"loss": 2.313, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 1.8416206261510129, |
|
"grad_norm": 4.230051040649414, |
|
"learning_rate": 1.1584407612031921e-05, |
|
"loss": 2.2457, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.8446899938612646, |
|
"grad_norm": 3.817789077758789, |
|
"learning_rate": 1.1553713934929405e-05, |
|
"loss": 2.2483, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 1.8477593615715162, |
|
"grad_norm": 3.726513147354126, |
|
"learning_rate": 1.1523020257826888e-05, |
|
"loss": 2.2662, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.850828729281768, |
|
"grad_norm": 3.6397483348846436, |
|
"learning_rate": 1.149232658072437e-05, |
|
"loss": 2.1713, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 1.8538980969920198, |
|
"grad_norm": 3.6421852111816406, |
|
"learning_rate": 1.1461632903621854e-05, |
|
"loss": 2.1855, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.8569674647022714, |
|
"grad_norm": 3.6123268604278564, |
|
"learning_rate": 1.1430939226519337e-05, |
|
"loss": 2.3794, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 1.860036832412523, |
|
"grad_norm": 3.842371940612793, |
|
"learning_rate": 1.140024554941682e-05, |
|
"loss": 2.3057, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.8631062001227747, |
|
"grad_norm": 5.15551233291626, |
|
"learning_rate": 1.1369551872314302e-05, |
|
"loss": 2.1755, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 1.8661755678330265, |
|
"grad_norm": 3.2684996128082275, |
|
"learning_rate": 1.1338858195211786e-05, |
|
"loss": 2.2181, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.869244935543278, |
|
"grad_norm": 3.720906972885132, |
|
"learning_rate": 1.130816451810927e-05, |
|
"loss": 2.2057, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 1.8723143032535297, |
|
"grad_norm": 3.2957749366760254, |
|
"learning_rate": 1.1277470841006753e-05, |
|
"loss": 2.2404, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.8753836709637814, |
|
"grad_norm": 3.4108922481536865, |
|
"learning_rate": 1.1246777163904237e-05, |
|
"loss": 2.3055, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 1.8784530386740332, |
|
"grad_norm": 2.9891228675842285, |
|
"learning_rate": 1.121608348680172e-05, |
|
"loss": 2.2714, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.8815224063842848, |
|
"grad_norm": 5.469006538391113, |
|
"learning_rate": 1.1185389809699202e-05, |
|
"loss": 2.2108, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 1.8845917740945364, |
|
"grad_norm": 3.9105262756347656, |
|
"learning_rate": 1.1154696132596686e-05, |
|
"loss": 2.2818, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.8876611418047882, |
|
"grad_norm": 3.2086987495422363, |
|
"learning_rate": 1.1124002455494169e-05, |
|
"loss": 2.2281, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 1.89073050951504, |
|
"grad_norm": 4.461240291595459, |
|
"learning_rate": 1.1093308778391652e-05, |
|
"loss": 2.2431, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.8937998772252915, |
|
"grad_norm": 4.049542427062988, |
|
"learning_rate": 1.1062615101289134e-05, |
|
"loss": 2.2089, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 1.8968692449355433, |
|
"grad_norm": 3.2396111488342285, |
|
"learning_rate": 1.1031921424186618e-05, |
|
"loss": 2.3097, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.899938612645795, |
|
"grad_norm": 3.4000086784362793, |
|
"learning_rate": 1.10012277470841e-05, |
|
"loss": 2.2472, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 1.9030079803560467, |
|
"grad_norm": 3.818934917449951, |
|
"learning_rate": 1.0970534069981583e-05, |
|
"loss": 2.2014, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.9060773480662982, |
|
"grad_norm": 3.150446891784668, |
|
"learning_rate": 1.0939840392879068e-05, |
|
"loss": 2.2697, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 1.90914671577655, |
|
"grad_norm": 3.2145376205444336, |
|
"learning_rate": 1.090914671577655e-05, |
|
"loss": 2.2698, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.9122160834868018, |
|
"grad_norm": 3.603330135345459, |
|
"learning_rate": 1.0878453038674033e-05, |
|
"loss": 2.2337, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 1.9152854511970534, |
|
"grad_norm": 3.6672143936157227, |
|
"learning_rate": 1.0847759361571515e-05, |
|
"loss": 2.2064, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.918354818907305, |
|
"grad_norm": 3.2886476516723633, |
|
"learning_rate": 1.0817065684469001e-05, |
|
"loss": 2.2159, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 1.9214241866175568, |
|
"grad_norm": 3.3169350624084473, |
|
"learning_rate": 1.0786372007366484e-05, |
|
"loss": 2.2242, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.9244935543278086, |
|
"grad_norm": 3.8866281509399414, |
|
"learning_rate": 1.0755678330263966e-05, |
|
"loss": 2.212, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 1.9275629220380601, |
|
"grad_norm": 3.3577752113342285, |
|
"learning_rate": 1.072498465316145e-05, |
|
"loss": 2.3055, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.9306322897483117, |
|
"grad_norm": 3.503736972808838, |
|
"learning_rate": 1.0694290976058933e-05, |
|
"loss": 2.1698, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 1.9337016574585635, |
|
"grad_norm": 5.08292818069458, |
|
"learning_rate": 1.0663597298956415e-05, |
|
"loss": 2.2281, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.9367710251688153, |
|
"grad_norm": 6.739192485809326, |
|
"learning_rate": 1.0632903621853898e-05, |
|
"loss": 2.2375, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 1.9398403928790668, |
|
"grad_norm": 5.141798496246338, |
|
"learning_rate": 1.0602209944751382e-05, |
|
"loss": 2.2684, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.9429097605893186, |
|
"grad_norm": 3.4031152725219727, |
|
"learning_rate": 1.0571516267648865e-05, |
|
"loss": 2.2104, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 1.9459791282995704, |
|
"grad_norm": 3.678633451461792, |
|
"learning_rate": 1.0540822590546347e-05, |
|
"loss": 2.2351, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.949048496009822, |
|
"grad_norm": 4.1313700675964355, |
|
"learning_rate": 1.0510128913443831e-05, |
|
"loss": 2.2951, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 1.9521178637200736, |
|
"grad_norm": 3.0364913940429688, |
|
"learning_rate": 1.0479435236341314e-05, |
|
"loss": 2.2499, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.9551872314303254, |
|
"grad_norm": 3.7849690914154053, |
|
"learning_rate": 1.0448741559238796e-05, |
|
"loss": 2.2005, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 1.9582565991405771, |
|
"grad_norm": 4.416446208953857, |
|
"learning_rate": 1.0418047882136279e-05, |
|
"loss": 2.3114, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.9613259668508287, |
|
"grad_norm": 3.4799766540527344, |
|
"learning_rate": 1.0387354205033763e-05, |
|
"loss": 2.2727, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 1.9643953345610803, |
|
"grad_norm": 5.180732727050781, |
|
"learning_rate": 1.0356660527931246e-05, |
|
"loss": 2.2864, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.967464702271332, |
|
"grad_norm": 3.589080810546875, |
|
"learning_rate": 1.032596685082873e-05, |
|
"loss": 2.2939, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 1.9705340699815839, |
|
"grad_norm": 4.802340984344482, |
|
"learning_rate": 1.0295273173726214e-05, |
|
"loss": 2.2003, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.9736034376918354, |
|
"grad_norm": 3.132723331451416, |
|
"learning_rate": 1.0264579496623697e-05, |
|
"loss": 2.2338, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 1.976672805402087, |
|
"grad_norm": 5.015474796295166, |
|
"learning_rate": 1.023388581952118e-05, |
|
"loss": 2.2431, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.979742173112339, |
|
"grad_norm": 3.432023286819458, |
|
"learning_rate": 1.0203192142418662e-05, |
|
"loss": 2.2862, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 1.9828115408225906, |
|
"grad_norm": 3.8772900104522705, |
|
"learning_rate": 1.0172498465316146e-05, |
|
"loss": 2.2758, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.9858809085328422, |
|
"grad_norm": 3.640902042388916, |
|
"learning_rate": 1.0141804788213629e-05, |
|
"loss": 2.2174, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 1.988950276243094, |
|
"grad_norm": 3.8185462951660156, |
|
"learning_rate": 1.0111111111111111e-05, |
|
"loss": 2.2102, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.9920196439533457, |
|
"grad_norm": 4.0993499755859375, |
|
"learning_rate": 1.0080417434008595e-05, |
|
"loss": 2.2474, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 1.9950890116635973, |
|
"grad_norm": 3.5613911151885986, |
|
"learning_rate": 1.0049723756906078e-05, |
|
"loss": 2.2452, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.9981583793738489, |
|
"grad_norm": 3.9277961254119873, |
|
"learning_rate": 1.001903007980356e-05, |
|
"loss": 2.2498, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 2.001227747084101, |
|
"grad_norm": 3.8902101516723633, |
|
"learning_rate": 9.988336402701043e-06, |
|
"loss": 2.3092, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 2.0042971147943525, |
|
"grad_norm": 3.29555606842041, |
|
"learning_rate": 9.957642725598527e-06, |
|
"loss": 2.2777, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 2.007366482504604, |
|
"grad_norm": 3.297602653503418, |
|
"learning_rate": 9.92694904849601e-06, |
|
"loss": 2.2509, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 2.0104358502148556, |
|
"grad_norm": 3.616257905960083, |
|
"learning_rate": 9.896255371393492e-06, |
|
"loss": 2.2828, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 2.0135052179251076, |
|
"grad_norm": 3.872678518295288, |
|
"learning_rate": 9.865561694290976e-06, |
|
"loss": 2.2431, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 2.016574585635359, |
|
"grad_norm": 4.2430338859558105, |
|
"learning_rate": 9.83486801718846e-06, |
|
"loss": 2.143, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 2.0196439533456108, |
|
"grad_norm": 4.328212738037109, |
|
"learning_rate": 9.804174340085943e-06, |
|
"loss": 2.3134, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 2.0227133210558623, |
|
"grad_norm": 3.488384246826172, |
|
"learning_rate": 9.773480662983426e-06, |
|
"loss": 2.2116, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 2.0257826887661143, |
|
"grad_norm": 4.3153910636901855, |
|
"learning_rate": 9.74278698588091e-06, |
|
"loss": 2.1732, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.028852056476366, |
|
"grad_norm": 3.968754768371582, |
|
"learning_rate": 9.712093308778392e-06, |
|
"loss": 2.2158, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 2.0319214241866175, |
|
"grad_norm": 3.1098225116729736, |
|
"learning_rate": 9.681399631675875e-06, |
|
"loss": 2.2657, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 2.034990791896869, |
|
"grad_norm": 3.7003393173217773, |
|
"learning_rate": 9.650705954573359e-06, |
|
"loss": 2.2534, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 2.038060159607121, |
|
"grad_norm": 3.1514766216278076, |
|
"learning_rate": 9.620012277470842e-06, |
|
"loss": 2.1827, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.0411295273173726, |
|
"grad_norm": 3.5508854389190674, |
|
"learning_rate": 9.589318600368324e-06, |
|
"loss": 2.2009, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 2.044198895027624, |
|
"grad_norm": 4.060067653656006, |
|
"learning_rate": 9.558624923265807e-06, |
|
"loss": 2.2533, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 2.047268262737876, |
|
"grad_norm": 3.6063380241394043, |
|
"learning_rate": 9.527931246163291e-06, |
|
"loss": 2.254, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 2.050337630448128, |
|
"grad_norm": 5.065506458282471, |
|
"learning_rate": 9.497237569060773e-06, |
|
"loss": 2.2285, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 2.0534069981583793, |
|
"grad_norm": 3.942070245742798, |
|
"learning_rate": 9.466543891958256e-06, |
|
"loss": 2.2693, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 2.056476365868631, |
|
"grad_norm": 4.165147304534912, |
|
"learning_rate": 9.43585021485574e-06, |
|
"loss": 2.1426, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.059545733578883, |
|
"grad_norm": 9.669456481933594, |
|
"learning_rate": 9.405156537753223e-06, |
|
"loss": 2.2249, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 2.0626151012891345, |
|
"grad_norm": 3.426900625228882, |
|
"learning_rate": 9.374462860650705e-06, |
|
"loss": 2.2908, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 2.065684468999386, |
|
"grad_norm": 4.799295902252197, |
|
"learning_rate": 9.34376918354819e-06, |
|
"loss": 2.226, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 2.0687538367096376, |
|
"grad_norm": 3.066361427307129, |
|
"learning_rate": 9.313075506445674e-06, |
|
"loss": 2.2653, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 2.0718232044198897, |
|
"grad_norm": 4.229564666748047, |
|
"learning_rate": 9.282381829343156e-06, |
|
"loss": 2.2778, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 2.074892572130141, |
|
"grad_norm": 3.7543585300445557, |
|
"learning_rate": 9.251688152240639e-06, |
|
"loss": 2.1851, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 2.077961939840393, |
|
"grad_norm": 4.075713634490967, |
|
"learning_rate": 9.220994475138123e-06, |
|
"loss": 2.189, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 2.0810313075506444, |
|
"grad_norm": 4.204864978790283, |
|
"learning_rate": 9.190300798035606e-06, |
|
"loss": 2.2357, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 2.0841006752608964, |
|
"grad_norm": 4.006982326507568, |
|
"learning_rate": 9.159607120933088e-06, |
|
"loss": 2.231, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 2.087170042971148, |
|
"grad_norm": 3.0241997241973877, |
|
"learning_rate": 9.12891344383057e-06, |
|
"loss": 2.1866, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.0902394106813995, |
|
"grad_norm": 3.5990588665008545, |
|
"learning_rate": 9.098219766728055e-06, |
|
"loss": 2.145, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 2.0933087783916515, |
|
"grad_norm": 3.6155498027801514, |
|
"learning_rate": 9.067526089625537e-06, |
|
"loss": 2.1855, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 2.096378146101903, |
|
"grad_norm": 3.9599666595458984, |
|
"learning_rate": 9.03683241252302e-06, |
|
"loss": 2.2783, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 2.0994475138121547, |
|
"grad_norm": 3.8327977657318115, |
|
"learning_rate": 9.006138735420504e-06, |
|
"loss": 2.1919, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 2.1025168815224062, |
|
"grad_norm": 3.3617892265319824, |
|
"learning_rate": 8.975445058317987e-06, |
|
"loss": 2.1101, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 2.1055862492326582, |
|
"grad_norm": 3.5898163318634033, |
|
"learning_rate": 8.944751381215469e-06, |
|
"loss": 2.1866, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 2.10865561694291, |
|
"grad_norm": 4.3782525062561035, |
|
"learning_rate": 8.914057704112952e-06, |
|
"loss": 2.233, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 2.1117249846531614, |
|
"grad_norm": 3.649711847305298, |
|
"learning_rate": 8.883364027010436e-06, |
|
"loss": 2.2422, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 2.114794352363413, |
|
"grad_norm": 3.99489164352417, |
|
"learning_rate": 8.852670349907918e-06, |
|
"loss": 2.1979, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 2.117863720073665, |
|
"grad_norm": 4.443358421325684, |
|
"learning_rate": 8.821976672805403e-06, |
|
"loss": 2.1984, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.1209330877839165, |
|
"grad_norm": 2.918077230453491, |
|
"learning_rate": 8.791282995702887e-06, |
|
"loss": 2.1995, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 2.124002455494168, |
|
"grad_norm": 3.6200385093688965, |
|
"learning_rate": 8.76058931860037e-06, |
|
"loss": 2.2225, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 2.12707182320442, |
|
"grad_norm": 3.616900682449341, |
|
"learning_rate": 8.729895641497852e-06, |
|
"loss": 2.1904, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 2.1301411909146717, |
|
"grad_norm": 3.1443259716033936, |
|
"learning_rate": 8.699201964395334e-06, |
|
"loss": 2.1765, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 2.1332105586249233, |
|
"grad_norm": 3.3852028846740723, |
|
"learning_rate": 8.668508287292819e-06, |
|
"loss": 2.2505, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 2.136279926335175, |
|
"grad_norm": 3.266024112701416, |
|
"learning_rate": 8.637814610190301e-06, |
|
"loss": 2.1815, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 2.139349294045427, |
|
"grad_norm": 4.952578067779541, |
|
"learning_rate": 8.607120933087784e-06, |
|
"loss": 2.2401, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 2.1424186617556784, |
|
"grad_norm": 4.235185623168945, |
|
"learning_rate": 8.576427255985268e-06, |
|
"loss": 2.2374, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.14548802946593, |
|
"grad_norm": 3.80965256690979, |
|
"learning_rate": 8.54573357888275e-06, |
|
"loss": 2.2094, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 2.1485573971761815, |
|
"grad_norm": 5.098249435424805, |
|
"learning_rate": 8.515039901780233e-06, |
|
"loss": 2.1882, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.1516267648864336, |
|
"grad_norm": 3.5961649417877197, |
|
"learning_rate": 8.484346224677715e-06, |
|
"loss": 2.2372, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 2.154696132596685, |
|
"grad_norm": 3.425548791885376, |
|
"learning_rate": 8.4536525475752e-06, |
|
"loss": 2.2271, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.1577655003069367, |
|
"grad_norm": 3.765516996383667, |
|
"learning_rate": 8.422958870472682e-06, |
|
"loss": 2.2176, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 2.1608348680171883, |
|
"grad_norm": 4.038573265075684, |
|
"learning_rate": 8.392265193370165e-06, |
|
"loss": 2.214, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.1639042357274403, |
|
"grad_norm": 4.2313385009765625, |
|
"learning_rate": 8.361571516267649e-06, |
|
"loss": 2.2596, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 2.166973603437692, |
|
"grad_norm": 3.4800400733947754, |
|
"learning_rate": 8.330877839165133e-06, |
|
"loss": 2.2576, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.1700429711479434, |
|
"grad_norm": 3.5811681747436523, |
|
"learning_rate": 8.300184162062616e-06, |
|
"loss": 2.1885, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 2.1731123388581954, |
|
"grad_norm": 3.505411148071289, |
|
"learning_rate": 8.269490484960098e-06, |
|
"loss": 2.1773, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.176181706568447, |
|
"grad_norm": 3.3958797454833984, |
|
"learning_rate": 8.238796807857582e-06, |
|
"loss": 2.2335, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 2.1792510742786986, |
|
"grad_norm": 4.680118083953857, |
|
"learning_rate": 8.208103130755065e-06, |
|
"loss": 2.2351, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.18232044198895, |
|
"grad_norm": 3.314845323562622, |
|
"learning_rate": 8.177409453652548e-06, |
|
"loss": 2.2563, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 2.185389809699202, |
|
"grad_norm": 3.014174461364746, |
|
"learning_rate": 8.146715776550032e-06, |
|
"loss": 2.2946, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.1884591774094537, |
|
"grad_norm": 6.031067848205566, |
|
"learning_rate": 8.116022099447514e-06, |
|
"loss": 2.2273, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 2.1915285451197053, |
|
"grad_norm": 4.454038143157959, |
|
"learning_rate": 8.085328422344997e-06, |
|
"loss": 2.226, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.194597912829957, |
|
"grad_norm": 4.110731601715088, |
|
"learning_rate": 8.05463474524248e-06, |
|
"loss": 2.3176, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 2.197667280540209, |
|
"grad_norm": 3.230386972427368, |
|
"learning_rate": 8.023941068139964e-06, |
|
"loss": 2.2589, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.2007366482504604, |
|
"grad_norm": 4.837220668792725, |
|
"learning_rate": 7.993247391037446e-06, |
|
"loss": 2.2341, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 2.203806015960712, |
|
"grad_norm": 4.532881259918213, |
|
"learning_rate": 7.962553713934929e-06, |
|
"loss": 2.2125, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.2068753836709636, |
|
"grad_norm": 3.323784828186035, |
|
"learning_rate": 7.931860036832413e-06, |
|
"loss": 2.19, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 2.2099447513812156, |
|
"grad_norm": 4.003852844238281, |
|
"learning_rate": 7.901166359729895e-06, |
|
"loss": 2.2215, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.213014119091467, |
|
"grad_norm": 3.0279271602630615, |
|
"learning_rate": 7.870472682627378e-06, |
|
"loss": 2.2177, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 2.2160834868017187, |
|
"grad_norm": 4.593332290649414, |
|
"learning_rate": 7.839779005524862e-06, |
|
"loss": 2.1596, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.2191528545119708, |
|
"grad_norm": 3.9358561038970947, |
|
"learning_rate": 7.809085328422346e-06, |
|
"loss": 2.3147, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 4.021229267120361, |
|
"learning_rate": 7.778391651319829e-06, |
|
"loss": 2.1922, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.225291589932474, |
|
"grad_norm": 3.740377426147461, |
|
"learning_rate": 7.747697974217311e-06, |
|
"loss": 2.2208, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 2.2283609576427255, |
|
"grad_norm": 3.133218765258789, |
|
"learning_rate": 7.717004297114796e-06, |
|
"loss": 2.181, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.2314303253529775, |
|
"grad_norm": 4.224998950958252, |
|
"learning_rate": 7.686310620012278e-06, |
|
"loss": 2.1571, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 2.234499693063229, |
|
"grad_norm": 3.908095359802246, |
|
"learning_rate": 7.65561694290976e-06, |
|
"loss": 2.2142, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.2375690607734806, |
|
"grad_norm": 4.671231746673584, |
|
"learning_rate": 7.624923265807243e-06, |
|
"loss": 2.2114, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 2.240638428483732, |
|
"grad_norm": 3.268892765045166, |
|
"learning_rate": 7.5942295887047274e-06, |
|
"loss": 2.2469, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.243707796193984, |
|
"grad_norm": 2.9523401260375977, |
|
"learning_rate": 7.56353591160221e-06, |
|
"loss": 2.171, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 2.2467771639042358, |
|
"grad_norm": 3.850844383239746, |
|
"learning_rate": 7.5328422344996925e-06, |
|
"loss": 2.1936, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.2498465316144873, |
|
"grad_norm": 3.288367509841919, |
|
"learning_rate": 7.502148557397177e-06, |
|
"loss": 2.1882, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 2.252915899324739, |
|
"grad_norm": 3.225170612335205, |
|
"learning_rate": 7.47145488029466e-06, |
|
"loss": 2.2253, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.255985267034991, |
|
"grad_norm": 3.7475740909576416, |
|
"learning_rate": 7.440761203192143e-06, |
|
"loss": 2.2224, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 2.2590546347452425, |
|
"grad_norm": 4.108501434326172, |
|
"learning_rate": 7.410067526089626e-06, |
|
"loss": 2.2538, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.262124002455494, |
|
"grad_norm": 4.399234771728516, |
|
"learning_rate": 7.3793738489871085e-06, |
|
"loss": 2.2093, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 2.265193370165746, |
|
"grad_norm": 4.0335235595703125, |
|
"learning_rate": 7.348680171884592e-06, |
|
"loss": 2.2265, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.2682627378759976, |
|
"grad_norm": 3.3310387134552, |
|
"learning_rate": 7.317986494782075e-06, |
|
"loss": 2.2195, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 2.271332105586249, |
|
"grad_norm": 14.169954299926758, |
|
"learning_rate": 7.287292817679558e-06, |
|
"loss": 2.3107, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.2744014732965008, |
|
"grad_norm": 4.349920272827148, |
|
"learning_rate": 7.256599140577041e-06, |
|
"loss": 2.2233, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 2.277470841006753, |
|
"grad_norm": 3.546018362045288, |
|
"learning_rate": 7.2259054634745245e-06, |
|
"loss": 2.1851, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.2805402087170044, |
|
"grad_norm": 3.578289270401001, |
|
"learning_rate": 7.195211786372008e-06, |
|
"loss": 2.1984, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 2.283609576427256, |
|
"grad_norm": 3.5594937801361084, |
|
"learning_rate": 7.1645181092694904e-06, |
|
"loss": 2.2544, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.2866789441375075, |
|
"grad_norm": 3.502493143081665, |
|
"learning_rate": 7.133824432166974e-06, |
|
"loss": 2.2898, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 2.2897483118477595, |
|
"grad_norm": 3.839489459991455, |
|
"learning_rate": 7.103130755064457e-06, |
|
"loss": 2.2333, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.292817679558011, |
|
"grad_norm": 3.7720537185668945, |
|
"learning_rate": 7.07243707796194e-06, |
|
"loss": 2.2484, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 2.2958870472682626, |
|
"grad_norm": 3.5186944007873535, |
|
"learning_rate": 7.041743400859423e-06, |
|
"loss": 2.2974, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.298956414978514, |
|
"grad_norm": 3.9113717079162598, |
|
"learning_rate": 7.011049723756906e-06, |
|
"loss": 2.2212, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 2.3020257826887662, |
|
"grad_norm": 3.704716920852661, |
|
"learning_rate": 6.98035604665439e-06, |
|
"loss": 2.2158, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.305095150399018, |
|
"grad_norm": 3.8221049308776855, |
|
"learning_rate": 6.949662369551872e-06, |
|
"loss": 2.1804, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 2.3081645181092694, |
|
"grad_norm": 3.8908891677856445, |
|
"learning_rate": 6.918968692449356e-06, |
|
"loss": 2.171, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.3112338858195214, |
|
"grad_norm": 3.604534149169922, |
|
"learning_rate": 6.888275015346839e-06, |
|
"loss": 2.2704, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 2.314303253529773, |
|
"grad_norm": 3.2667436599731445, |
|
"learning_rate": 6.857581338244322e-06, |
|
"loss": 2.2706, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.3173726212400245, |
|
"grad_norm": 3.7572014331817627, |
|
"learning_rate": 6.826887661141805e-06, |
|
"loss": 2.3158, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 2.320441988950276, |
|
"grad_norm": 3.4231903553009033, |
|
"learning_rate": 6.7961939840392875e-06, |
|
"loss": 2.2766, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.323511356660528, |
|
"grad_norm": 3.4527835845947266, |
|
"learning_rate": 6.765500306936771e-06, |
|
"loss": 2.1798, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 2.3265807243707797, |
|
"grad_norm": 4.387216091156006, |
|
"learning_rate": 6.734806629834254e-06, |
|
"loss": 2.2615, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.3296500920810312, |
|
"grad_norm": 3.5280401706695557, |
|
"learning_rate": 6.704112952731738e-06, |
|
"loss": 2.2263, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 2.332719459791283, |
|
"grad_norm": 3.647169351577759, |
|
"learning_rate": 6.673419275629221e-06, |
|
"loss": 2.2117, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.335788827501535, |
|
"grad_norm": 3.3504931926727295, |
|
"learning_rate": 6.6427255985267036e-06, |
|
"loss": 2.2202, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 2.3388581952117864, |
|
"grad_norm": 3.1713030338287354, |
|
"learning_rate": 6.612031921424187e-06, |
|
"loss": 2.1531, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.341927562922038, |
|
"grad_norm": 4.14404821395874, |
|
"learning_rate": 6.5813382443216695e-06, |
|
"loss": 2.2549, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 2.3449969306322895, |
|
"grad_norm": 4.7959065437316895, |
|
"learning_rate": 6.550644567219153e-06, |
|
"loss": 2.2079, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.3480662983425415, |
|
"grad_norm": 3.699985980987549, |
|
"learning_rate": 6.519950890116635e-06, |
|
"loss": 2.2061, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 2.351135666052793, |
|
"grad_norm": 3.93282151222229, |
|
"learning_rate": 6.48925721301412e-06, |
|
"loss": 2.1875, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.3542050337630447, |
|
"grad_norm": 3.5464470386505127, |
|
"learning_rate": 6.458563535911603e-06, |
|
"loss": 2.3065, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 2.3572744014732967, |
|
"grad_norm": 4.367957592010498, |
|
"learning_rate": 6.4278698588090855e-06, |
|
"loss": 2.2946, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.3603437691835483, |
|
"grad_norm": 4.520755767822266, |
|
"learning_rate": 6.397176181706569e-06, |
|
"loss": 2.2403, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 2.3634131368938, |
|
"grad_norm": 3.270214557647705, |
|
"learning_rate": 6.366482504604051e-06, |
|
"loss": 2.276, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.3664825046040514, |
|
"grad_norm": 4.663724422454834, |
|
"learning_rate": 6.335788827501535e-06, |
|
"loss": 2.2394, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 2.3695518723143034, |
|
"grad_norm": 3.735618829727173, |
|
"learning_rate": 6.305095150399017e-06, |
|
"loss": 2.1831, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.372621240024555, |
|
"grad_norm": 4.269412040710449, |
|
"learning_rate": 6.274401473296501e-06, |
|
"loss": 2.2155, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 2.3756906077348066, |
|
"grad_norm": 4.040123462677002, |
|
"learning_rate": 6.243707796193985e-06, |
|
"loss": 2.2135, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.378759975445058, |
|
"grad_norm": 3.0279011726379395, |
|
"learning_rate": 6.213014119091467e-06, |
|
"loss": 2.2062, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 2.38182934315531, |
|
"grad_norm": 4.656242370605469, |
|
"learning_rate": 6.182320441988951e-06, |
|
"loss": 2.2526, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.3848987108655617, |
|
"grad_norm": 4.1057233810424805, |
|
"learning_rate": 6.151626764886433e-06, |
|
"loss": 2.1878, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 2.3879680785758133, |
|
"grad_norm": 4.058590888977051, |
|
"learning_rate": 6.120933087783917e-06, |
|
"loss": 2.2473, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.391037446286065, |
|
"grad_norm": 3.7655320167541504, |
|
"learning_rate": 6.090239410681399e-06, |
|
"loss": 2.1313, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 2.394106813996317, |
|
"grad_norm": 3.7537214756011963, |
|
"learning_rate": 6.059545733578883e-06, |
|
"loss": 2.2779, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.3971761817065684, |
|
"grad_norm": 3.464635133743286, |
|
"learning_rate": 6.028852056476366e-06, |
|
"loss": 2.1035, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 2.40024554941682, |
|
"grad_norm": 3.9705522060394287, |
|
"learning_rate": 5.998158379373849e-06, |
|
"loss": 2.2249, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.403314917127072, |
|
"grad_norm": 2.9240760803222656, |
|
"learning_rate": 5.967464702271333e-06, |
|
"loss": 2.1655, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 2.4063842848373236, |
|
"grad_norm": 4.480701923370361, |
|
"learning_rate": 5.936771025168815e-06, |
|
"loss": 2.1998, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.409453652547575, |
|
"grad_norm": 3.32859468460083, |
|
"learning_rate": 5.906077348066299e-06, |
|
"loss": 2.2133, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 2.4125230202578267, |
|
"grad_norm": 2.9386136531829834, |
|
"learning_rate": 5.875383670963781e-06, |
|
"loss": 2.1524, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.4155923879680787, |
|
"grad_norm": 3.8305766582489014, |
|
"learning_rate": 5.8446899938612645e-06, |
|
"loss": 2.2915, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 2.4186617556783303, |
|
"grad_norm": 3.4347639083862305, |
|
"learning_rate": 5.813996316758748e-06, |
|
"loss": 2.1909, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.421731123388582, |
|
"grad_norm": 4.805240631103516, |
|
"learning_rate": 5.78330263965623e-06, |
|
"loss": 2.2795, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 2.424800491098834, |
|
"grad_norm": 3.2844135761260986, |
|
"learning_rate": 5.752608962553715e-06, |
|
"loss": 2.1176, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.4278698588090855, |
|
"grad_norm": 3.550025701522827, |
|
"learning_rate": 5.721915285451197e-06, |
|
"loss": 2.2356, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 2.430939226519337, |
|
"grad_norm": 3.8909902572631836, |
|
"learning_rate": 5.6912216083486805e-06, |
|
"loss": 2.2591, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.4340085942295886, |
|
"grad_norm": 3.584829330444336, |
|
"learning_rate": 5.660527931246163e-06, |
|
"loss": 2.2678, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 2.43707796193984, |
|
"grad_norm": 3.7134439945220947, |
|
"learning_rate": 5.6298342541436464e-06, |
|
"loss": 2.1648, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.440147329650092, |
|
"grad_norm": 4.022806167602539, |
|
"learning_rate": 5.59914057704113e-06, |
|
"loss": 2.1818, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 2.4432166973603437, |
|
"grad_norm": 3.5967869758605957, |
|
"learning_rate": 5.568446899938612e-06, |
|
"loss": 2.2368, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.4462860650705953, |
|
"grad_norm": 4.099997520446777, |
|
"learning_rate": 5.537753222836096e-06, |
|
"loss": 2.2196, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 2.4493554327808473, |
|
"grad_norm": 4.131256103515625, |
|
"learning_rate": 5.507059545733579e-06, |
|
"loss": 2.3592, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.452424800491099, |
|
"grad_norm": 3.403428077697754, |
|
"learning_rate": 5.4763658686310625e-06, |
|
"loss": 2.2484, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 2.4554941682013505, |
|
"grad_norm": 3.4898879528045654, |
|
"learning_rate": 5.445672191528546e-06, |
|
"loss": 2.2361, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.458563535911602, |
|
"grad_norm": 4.398887634277344, |
|
"learning_rate": 5.414978514426028e-06, |
|
"loss": 2.2462, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 2.461632903621854, |
|
"grad_norm": 4.28602409362793, |
|
"learning_rate": 5.384284837323512e-06, |
|
"loss": 2.3204, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.4647022713321056, |
|
"grad_norm": 4.812078475952148, |
|
"learning_rate": 5.353591160220994e-06, |
|
"loss": 2.2206, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 2.467771639042357, |
|
"grad_norm": 5.229348659515381, |
|
"learning_rate": 5.322897483118478e-06, |
|
"loss": 2.299, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.470841006752609, |
|
"grad_norm": 5.011894226074219, |
|
"learning_rate": 5.29220380601596e-06, |
|
"loss": 2.2308, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 2.4739103744628608, |
|
"grad_norm": 3.229832410812378, |
|
"learning_rate": 5.261510128913444e-06, |
|
"loss": 2.1293, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.4769797421731123, |
|
"grad_norm": 4.192412376403809, |
|
"learning_rate": 5.230816451810928e-06, |
|
"loss": 2.2252, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 2.480049109883364, |
|
"grad_norm": 4.124536037445068, |
|
"learning_rate": 5.20012277470841e-06, |
|
"loss": 2.1729, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.4831184775936155, |
|
"grad_norm": 3.670736789703369, |
|
"learning_rate": 5.169429097605894e-06, |
|
"loss": 2.2571, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 2.4861878453038675, |
|
"grad_norm": 5.001986026763916, |
|
"learning_rate": 5.138735420503376e-06, |
|
"loss": 2.2075, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.489257213014119, |
|
"grad_norm": 3.8158419132232666, |
|
"learning_rate": 5.1080417434008596e-06, |
|
"loss": 2.2931, |
|
"step": 40550 |
|
}, |
|
{ |
|
"epoch": 2.4923265807243706, |
|
"grad_norm": 3.6598846912384033, |
|
"learning_rate": 5.077348066298342e-06, |
|
"loss": 2.2037, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.4953959484346226, |
|
"grad_norm": 4.0994110107421875, |
|
"learning_rate": 5.0466543891958255e-06, |
|
"loss": 2.2622, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 2.498465316144874, |
|
"grad_norm": 3.9565281867980957, |
|
"learning_rate": 5.01596071209331e-06, |
|
"loss": 2.1952, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.501534683855126, |
|
"grad_norm": 3.9254519939422607, |
|
"learning_rate": 4.985267034990792e-06, |
|
"loss": 2.2506, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 2.5046040515653774, |
|
"grad_norm": 4.242046356201172, |
|
"learning_rate": 4.954573357888276e-06, |
|
"loss": 2.2135, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.5076734192756294, |
|
"grad_norm": 3.1262447834014893, |
|
"learning_rate": 4.923879680785758e-06, |
|
"loss": 2.2056, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 2.510742786985881, |
|
"grad_norm": 4.857666015625, |
|
"learning_rate": 4.8931860036832415e-06, |
|
"loss": 2.2238, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.5138121546961325, |
|
"grad_norm": 4.507630348205566, |
|
"learning_rate": 4.862492326580724e-06, |
|
"loss": 2.2332, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 2.5168815224063845, |
|
"grad_norm": 4.321670055389404, |
|
"learning_rate": 4.831798649478207e-06, |
|
"loss": 2.2581, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.519950890116636, |
|
"grad_norm": 3.4853837490081787, |
|
"learning_rate": 4.801104972375691e-06, |
|
"loss": 2.3517, |
|
"step": 41050 |
|
}, |
|
{ |
|
"epoch": 2.5230202578268877, |
|
"grad_norm": 4.295222759246826, |
|
"learning_rate": 4.770411295273174e-06, |
|
"loss": 2.1357, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.5260896255371392, |
|
"grad_norm": 3.4203784465789795, |
|
"learning_rate": 4.7397176181706575e-06, |
|
"loss": 2.1721, |
|
"step": 41150 |
|
}, |
|
{ |
|
"epoch": 2.529158993247391, |
|
"grad_norm": 4.489879608154297, |
|
"learning_rate": 4.70902394106814e-06, |
|
"loss": 2.196, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 2.532228360957643, |
|
"grad_norm": 3.238175392150879, |
|
"learning_rate": 4.6783302639656234e-06, |
|
"loss": 2.2373, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 2.5352977286678944, |
|
"grad_norm": 4.743640422821045, |
|
"learning_rate": 4.647636586863106e-06, |
|
"loss": 2.309, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 2.538367096378146, |
|
"grad_norm": 2.7739622592926025, |
|
"learning_rate": 4.616942909760589e-06, |
|
"loss": 2.1396, |
|
"step": 41350 |
|
}, |
|
{ |
|
"epoch": 2.541436464088398, |
|
"grad_norm": 3.4076218605041504, |
|
"learning_rate": 4.586249232658073e-06, |
|
"loss": 2.2281, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 2.5445058317986495, |
|
"grad_norm": 4.367641448974609, |
|
"learning_rate": 4.555555555555555e-06, |
|
"loss": 2.2136, |
|
"step": 41450 |
|
}, |
|
{ |
|
"epoch": 2.547575199508901, |
|
"grad_norm": 3.8523755073547363, |
|
"learning_rate": 4.5248618784530395e-06, |
|
"loss": 2.265, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.5506445672191527, |
|
"grad_norm": 3.5632312297821045, |
|
"learning_rate": 4.494168201350522e-06, |
|
"loss": 2.2266, |
|
"step": 41550 |
|
}, |
|
{ |
|
"epoch": 2.5537139349294047, |
|
"grad_norm": 4.128525733947754, |
|
"learning_rate": 4.463474524248005e-06, |
|
"loss": 2.209, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 2.5567833026396563, |
|
"grad_norm": 3.2727203369140625, |
|
"learning_rate": 4.432780847145488e-06, |
|
"loss": 2.167, |
|
"step": 41650 |
|
}, |
|
{ |
|
"epoch": 2.559852670349908, |
|
"grad_norm": 4.561786651611328, |
|
"learning_rate": 4.402087170042971e-06, |
|
"loss": 2.2009, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 2.56292203806016, |
|
"grad_norm": 3.624037742614746, |
|
"learning_rate": 4.371393492940455e-06, |
|
"loss": 2.2852, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 2.5659914057704114, |
|
"grad_norm": 4.098727226257324, |
|
"learning_rate": 4.340699815837937e-06, |
|
"loss": 2.1527, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 2.569060773480663, |
|
"grad_norm": 3.3079962730407715, |
|
"learning_rate": 4.3100061387354205e-06, |
|
"loss": 2.2701, |
|
"step": 41850 |
|
}, |
|
{ |
|
"epoch": 2.5721301411909145, |
|
"grad_norm": 3.725670099258423, |
|
"learning_rate": 4.279312461632904e-06, |
|
"loss": 2.2145, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 2.575199508901166, |
|
"grad_norm": 4.11065673828125, |
|
"learning_rate": 4.248618784530387e-06, |
|
"loss": 2.2223, |
|
"step": 41950 |
|
}, |
|
{ |
|
"epoch": 2.578268876611418, |
|
"grad_norm": 3.768911123275757, |
|
"learning_rate": 4.21792510742787e-06, |
|
"loss": 2.2406, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.5813382443216697, |
|
"grad_norm": 3.27990984916687, |
|
"learning_rate": 4.187231430325353e-06, |
|
"loss": 2.2224, |
|
"step": 42050 |
|
}, |
|
{ |
|
"epoch": 2.5844076120319213, |
|
"grad_norm": 3.7315287590026855, |
|
"learning_rate": 4.1565377532228366e-06, |
|
"loss": 2.3168, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 2.5874769797421733, |
|
"grad_norm": 3.7368297576904297, |
|
"learning_rate": 4.125844076120319e-06, |
|
"loss": 2.3103, |
|
"step": 42150 |
|
}, |
|
{ |
|
"epoch": 2.590546347452425, |
|
"grad_norm": 3.973989725112915, |
|
"learning_rate": 4.0951503990178025e-06, |
|
"loss": 2.2719, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.5936157151626764, |
|
"grad_norm": 3.1219234466552734, |
|
"learning_rate": 4.064456721915285e-06, |
|
"loss": 2.2401, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 2.596685082872928, |
|
"grad_norm": 3.8633742332458496, |
|
"learning_rate": 4.033763044812769e-06, |
|
"loss": 2.1432, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.59975445058318, |
|
"grad_norm": 3.6198391914367676, |
|
"learning_rate": 4.003069367710252e-06, |
|
"loss": 2.2469, |
|
"step": 42350 |
|
}, |
|
{ |
|
"epoch": 2.6028238182934316, |
|
"grad_norm": 4.8632707595825195, |
|
"learning_rate": 3.972375690607735e-06, |
|
"loss": 2.3461, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.605893186003683, |
|
"grad_norm": 3.7397594451904297, |
|
"learning_rate": 3.9416820135052185e-06, |
|
"loss": 2.2278, |
|
"step": 42450 |
|
}, |
|
{ |
|
"epoch": 2.608962553713935, |
|
"grad_norm": 3.7671289443969727, |
|
"learning_rate": 3.910988336402701e-06, |
|
"loss": 2.2099, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.6120319214241867, |
|
"grad_norm": 3.9413743019104004, |
|
"learning_rate": 3.880294659300184e-06, |
|
"loss": 2.2534, |
|
"step": 42550 |
|
}, |
|
{ |
|
"epoch": 2.6151012891344383, |
|
"grad_norm": 3.448629856109619, |
|
"learning_rate": 3.849600982197667e-06, |
|
"loss": 2.2862, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.61817065684469, |
|
"grad_norm": 5.043887138366699, |
|
"learning_rate": 3.81890730509515e-06, |
|
"loss": 2.1931, |
|
"step": 42650 |
|
}, |
|
{ |
|
"epoch": 2.6212400245549414, |
|
"grad_norm": 3.661371946334839, |
|
"learning_rate": 3.7882136279926332e-06, |
|
"loss": 2.2653, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.6243093922651934, |
|
"grad_norm": 4.375932216644287, |
|
"learning_rate": 3.7575199508901166e-06, |
|
"loss": 2.1996, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 2.627378759975445, |
|
"grad_norm": 4.304765701293945, |
|
"learning_rate": 3.7268262737876e-06, |
|
"loss": 2.1938, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.630448127685697, |
|
"grad_norm": 3.3659396171569824, |
|
"learning_rate": 3.696132596685083e-06, |
|
"loss": 2.2412, |
|
"step": 42850 |
|
}, |
|
{ |
|
"epoch": 2.6335174953959486, |
|
"grad_norm": 3.610954999923706, |
|
"learning_rate": 3.665438919582566e-06, |
|
"loss": 2.238, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.6365868631062, |
|
"grad_norm": 3.3917031288146973, |
|
"learning_rate": 3.6347452424800493e-06, |
|
"loss": 2.2998, |
|
"step": 42950 |
|
}, |
|
{ |
|
"epoch": 2.6396562308164517, |
|
"grad_norm": 3.4687845706939697, |
|
"learning_rate": 3.6040515653775326e-06, |
|
"loss": 2.2232, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.6427255985267033, |
|
"grad_norm": 4.230668544769287, |
|
"learning_rate": 3.5733578882750156e-06, |
|
"loss": 2.1685, |
|
"step": 43050 |
|
}, |
|
{ |
|
"epoch": 2.6457949662369553, |
|
"grad_norm": 3.617204189300537, |
|
"learning_rate": 3.5426642111724985e-06, |
|
"loss": 2.218, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.648864333947207, |
|
"grad_norm": 3.763354778289795, |
|
"learning_rate": 3.5119705340699815e-06, |
|
"loss": 2.321, |
|
"step": 43150 |
|
}, |
|
{ |
|
"epoch": 2.6519337016574585, |
|
"grad_norm": 3.923051357269287, |
|
"learning_rate": 3.481276856967465e-06, |
|
"loss": 2.2174, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.6550030693677105, |
|
"grad_norm": 4.259540557861328, |
|
"learning_rate": 3.450583179864948e-06, |
|
"loss": 2.2633, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 2.658072437077962, |
|
"grad_norm": 4.246336936950684, |
|
"learning_rate": 3.419889502762431e-06, |
|
"loss": 2.2235, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.6611418047882136, |
|
"grad_norm": 3.1326816082000732, |
|
"learning_rate": 3.389195825659914e-06, |
|
"loss": 2.1769, |
|
"step": 43350 |
|
}, |
|
{ |
|
"epoch": 2.664211172498465, |
|
"grad_norm": 5.116452217102051, |
|
"learning_rate": 3.3585021485573975e-06, |
|
"loss": 2.1907, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.6672805402087167, |
|
"grad_norm": 3.177436113357544, |
|
"learning_rate": 3.3278084714548805e-06, |
|
"loss": 2.2524, |
|
"step": 43450 |
|
}, |
|
{ |
|
"epoch": 2.6703499079189688, |
|
"grad_norm": 2.992366313934326, |
|
"learning_rate": 3.2971147943523634e-06, |
|
"loss": 2.1998, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.6734192756292203, |
|
"grad_norm": 3.997972249984741, |
|
"learning_rate": 3.2664211172498464e-06, |
|
"loss": 2.234, |
|
"step": 43550 |
|
}, |
|
{ |
|
"epoch": 2.6764886433394723, |
|
"grad_norm": 4.2181267738342285, |
|
"learning_rate": 3.2357274401473297e-06, |
|
"loss": 2.1645, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 2.679558011049724, |
|
"grad_norm": 3.3036773204803467, |
|
"learning_rate": 3.205033763044813e-06, |
|
"loss": 2.2462, |
|
"step": 43650 |
|
}, |
|
{ |
|
"epoch": 2.6826273787599755, |
|
"grad_norm": 4.222419738769531, |
|
"learning_rate": 3.174340085942296e-06, |
|
"loss": 2.283, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 2.685696746470227, |
|
"grad_norm": 3.980220079421997, |
|
"learning_rate": 3.143646408839779e-06, |
|
"loss": 2.2752, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 2.6887661141804786, |
|
"grad_norm": 3.438683271408081, |
|
"learning_rate": 3.1129527317372624e-06, |
|
"loss": 2.1897, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 2.6918354818907306, |
|
"grad_norm": 3.9108569622039795, |
|
"learning_rate": 3.0822590546347453e-06, |
|
"loss": 2.2084, |
|
"step": 43850 |
|
}, |
|
{ |
|
"epoch": 2.694904849600982, |
|
"grad_norm": 3.4712257385253906, |
|
"learning_rate": 3.0515653775322283e-06, |
|
"loss": 2.0835, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 2.6979742173112338, |
|
"grad_norm": 3.4415714740753174, |
|
"learning_rate": 3.0208717004297112e-06, |
|
"loss": 2.3018, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 2.701043585021486, |
|
"grad_norm": 4.478912353515625, |
|
"learning_rate": 2.990178023327195e-06, |
|
"loss": 2.1982, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.7041129527317374, |
|
"grad_norm": 4.173290729522705, |
|
"learning_rate": 2.959484346224678e-06, |
|
"loss": 2.1964, |
|
"step": 44050 |
|
}, |
|
{ |
|
"epoch": 2.707182320441989, |
|
"grad_norm": 3.7616212368011475, |
|
"learning_rate": 2.928790669122161e-06, |
|
"loss": 2.2183, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 2.7102516881522405, |
|
"grad_norm": 5.122647285461426, |
|
"learning_rate": 2.898096992019644e-06, |
|
"loss": 2.2466, |
|
"step": 44150 |
|
}, |
|
{ |
|
"epoch": 2.713321055862492, |
|
"grad_norm": 3.6268253326416016, |
|
"learning_rate": 2.8674033149171273e-06, |
|
"loss": 2.2719, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 2.716390423572744, |
|
"grad_norm": 4.107768535614014, |
|
"learning_rate": 2.8367096378146102e-06, |
|
"loss": 2.148, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 2.7194597912829956, |
|
"grad_norm": 3.9949638843536377, |
|
"learning_rate": 2.806015960712093e-06, |
|
"loss": 2.2333, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 2.7225291589932477, |
|
"grad_norm": 3.9412174224853516, |
|
"learning_rate": 2.7753222836095765e-06, |
|
"loss": 2.1901, |
|
"step": 44350 |
|
}, |
|
{ |
|
"epoch": 2.7255985267034992, |
|
"grad_norm": 3.243807792663574, |
|
"learning_rate": 2.74462860650706e-06, |
|
"loss": 2.163, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 2.728667894413751, |
|
"grad_norm": 4.045169353485107, |
|
"learning_rate": 2.713934929404543e-06, |
|
"loss": 2.1712, |
|
"step": 44450 |
|
}, |
|
{ |
|
"epoch": 2.7317372621240024, |
|
"grad_norm": 3.781874418258667, |
|
"learning_rate": 2.683241252302026e-06, |
|
"loss": 2.2034, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.734806629834254, |
|
"grad_norm": 3.88508677482605, |
|
"learning_rate": 2.6525475751995088e-06, |
|
"loss": 2.1833, |
|
"step": 44550 |
|
}, |
|
{ |
|
"epoch": 2.737875997544506, |
|
"grad_norm": 4.135626792907715, |
|
"learning_rate": 2.621853898096992e-06, |
|
"loss": 2.2339, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 2.7409453652547575, |
|
"grad_norm": 3.489367723464966, |
|
"learning_rate": 2.591160220994475e-06, |
|
"loss": 2.2991, |
|
"step": 44650 |
|
}, |
|
{ |
|
"epoch": 2.744014732965009, |
|
"grad_norm": 3.8391823768615723, |
|
"learning_rate": 2.5604665438919585e-06, |
|
"loss": 2.1783, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 2.747084100675261, |
|
"grad_norm": 3.0692577362060547, |
|
"learning_rate": 2.5297728667894414e-06, |
|
"loss": 2.195, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 2.7501534683855127, |
|
"grad_norm": 3.084923267364502, |
|
"learning_rate": 2.4990791896869244e-06, |
|
"loss": 2.2347, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.7532228360957642, |
|
"grad_norm": 3.383420705795288, |
|
"learning_rate": 2.4683855125844077e-06, |
|
"loss": 2.168, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 2.756292203806016, |
|
"grad_norm": 3.1771624088287354, |
|
"learning_rate": 2.4376918354818907e-06, |
|
"loss": 2.217, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 2.7593615715162674, |
|
"grad_norm": 3.8641133308410645, |
|
"learning_rate": 2.4069981583793737e-06, |
|
"loss": 2.2976, |
|
"step": 44950 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 3.6356940269470215, |
|
"learning_rate": 2.376304481276857e-06, |
|
"loss": 2.2688, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.765500306936771, |
|
"grad_norm": 3.3960859775543213, |
|
"learning_rate": 2.3456108041743404e-06, |
|
"loss": 2.2591, |
|
"step": 45050 |
|
}, |
|
{ |
|
"epoch": 2.768569674647023, |
|
"grad_norm": 4.219804286956787, |
|
"learning_rate": 2.3149171270718234e-06, |
|
"loss": 2.2724, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 2.7716390423572745, |
|
"grad_norm": 4.273275852203369, |
|
"learning_rate": 2.2842234499693063e-06, |
|
"loss": 2.2016, |
|
"step": 45150 |
|
}, |
|
{ |
|
"epoch": 2.774708410067526, |
|
"grad_norm": 4.720740795135498, |
|
"learning_rate": 2.2535297728667893e-06, |
|
"loss": 2.2711, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 3.9274086952209473, |
|
"learning_rate": 2.2228360957642726e-06, |
|
"loss": 2.2219, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 2.7808471454880292, |
|
"grad_norm": 3.7379603385925293, |
|
"learning_rate": 2.1921424186617556e-06, |
|
"loss": 2.2034, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 2.7839165131982813, |
|
"grad_norm": 3.719149112701416, |
|
"learning_rate": 2.161448741559239e-06, |
|
"loss": 2.2461, |
|
"step": 45350 |
|
}, |
|
{ |
|
"epoch": 2.786985880908533, |
|
"grad_norm": 3.402672529220581, |
|
"learning_rate": 2.130755064456722e-06, |
|
"loss": 2.2349, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 2.7900552486187844, |
|
"grad_norm": 3.5159754753112793, |
|
"learning_rate": 2.1000613873542053e-06, |
|
"loss": 2.1584, |
|
"step": 45450 |
|
}, |
|
{ |
|
"epoch": 2.7931246163290364, |
|
"grad_norm": 3.4366443157196045, |
|
"learning_rate": 2.0693677102516882e-06, |
|
"loss": 2.1867, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.796193984039288, |
|
"grad_norm": 3.5394604206085205, |
|
"learning_rate": 2.038674033149171e-06, |
|
"loss": 2.301, |
|
"step": 45550 |
|
}, |
|
{ |
|
"epoch": 2.7992633517495396, |
|
"grad_norm": 5.54389762878418, |
|
"learning_rate": 2.007980356046654e-06, |
|
"loss": 2.2846, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 2.802332719459791, |
|
"grad_norm": 12.670145988464355, |
|
"learning_rate": 1.9772866789441375e-06, |
|
"loss": 2.2808, |
|
"step": 45650 |
|
}, |
|
{ |
|
"epoch": 2.805402087170043, |
|
"grad_norm": 4.009146690368652, |
|
"learning_rate": 1.946593001841621e-06, |
|
"loss": 2.1663, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 2.8084714548802947, |
|
"grad_norm": 4.112977504730225, |
|
"learning_rate": 1.915899324739104e-06, |
|
"loss": 2.2551, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 2.8115408225905463, |
|
"grad_norm": 5.213067054748535, |
|
"learning_rate": 1.885205647636587e-06, |
|
"loss": 2.2628, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 2.8146101903007983, |
|
"grad_norm": 3.289320230484009, |
|
"learning_rate": 1.85451197053407e-06, |
|
"loss": 2.2012, |
|
"step": 45850 |
|
}, |
|
{ |
|
"epoch": 2.81767955801105, |
|
"grad_norm": 3.8698418140411377, |
|
"learning_rate": 1.8238182934315531e-06, |
|
"loss": 2.1918, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 2.8207489257213014, |
|
"grad_norm": 3.230456829071045, |
|
"learning_rate": 1.7931246163290363e-06, |
|
"loss": 2.1566, |
|
"step": 45950 |
|
}, |
|
{ |
|
"epoch": 2.823818293431553, |
|
"grad_norm": 3.878119945526123, |
|
"learning_rate": 1.7624309392265194e-06, |
|
"loss": 2.1233, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.8268876611418046, |
|
"grad_norm": 3.892206907272339, |
|
"learning_rate": 1.7317372621240024e-06, |
|
"loss": 2.2653, |
|
"step": 46050 |
|
}, |
|
{ |
|
"epoch": 2.8299570288520566, |
|
"grad_norm": 5.064377784729004, |
|
"learning_rate": 1.7010435850214855e-06, |
|
"loss": 2.1615, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 2.833026396562308, |
|
"grad_norm": 3.4874629974365234, |
|
"learning_rate": 1.6703499079189687e-06, |
|
"loss": 2.1768, |
|
"step": 46150 |
|
}, |
|
{ |
|
"epoch": 2.8360957642725597, |
|
"grad_norm": 4.075310230255127, |
|
"learning_rate": 1.6396562308164519e-06, |
|
"loss": 2.1583, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 2.8391651319828117, |
|
"grad_norm": 6.029613018035889, |
|
"learning_rate": 1.6089625537139348e-06, |
|
"loss": 2.2742, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 2.8422344996930633, |
|
"grad_norm": 3.3309133052825928, |
|
"learning_rate": 1.5782688766114182e-06, |
|
"loss": 2.2977, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 2.845303867403315, |
|
"grad_norm": 3.7084951400756836, |
|
"learning_rate": 1.5475751995089011e-06, |
|
"loss": 2.2776, |
|
"step": 46350 |
|
}, |
|
{ |
|
"epoch": 2.8483732351135664, |
|
"grad_norm": 3.8084752559661865, |
|
"learning_rate": 1.5168815224063843e-06, |
|
"loss": 2.2517, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.8514426028238185, |
|
"grad_norm": 3.2854843139648438, |
|
"learning_rate": 1.4861878453038673e-06, |
|
"loss": 2.2082, |
|
"step": 46450 |
|
}, |
|
{ |
|
"epoch": 2.85451197053407, |
|
"grad_norm": 3.1363027095794678, |
|
"learning_rate": 1.4554941682013506e-06, |
|
"loss": 2.1851, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.8575813382443216, |
|
"grad_norm": 2.982666492462158, |
|
"learning_rate": 1.4248004910988336e-06, |
|
"loss": 2.2236, |
|
"step": 46550 |
|
}, |
|
{ |
|
"epoch": 2.8606507059545736, |
|
"grad_norm": 3.61039662361145, |
|
"learning_rate": 1.3941068139963167e-06, |
|
"loss": 2.2663, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 2.863720073664825, |
|
"grad_norm": 3.5564205646514893, |
|
"learning_rate": 1.3634131368938e-06, |
|
"loss": 2.2026, |
|
"step": 46650 |
|
}, |
|
{ |
|
"epoch": 2.8667894413750767, |
|
"grad_norm": 3.3528811931610107, |
|
"learning_rate": 1.332719459791283e-06, |
|
"loss": 2.2235, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 2.8698588090853283, |
|
"grad_norm": 3.672039270401001, |
|
"learning_rate": 1.302025782688766e-06, |
|
"loss": 2.2037, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 2.87292817679558, |
|
"grad_norm": 3.8955376148223877, |
|
"learning_rate": 1.2713321055862492e-06, |
|
"loss": 2.2178, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 2.875997544505832, |
|
"grad_norm": 4.099259376525879, |
|
"learning_rate": 1.2406384284837323e-06, |
|
"loss": 2.1855, |
|
"step": 46850 |
|
}, |
|
{ |
|
"epoch": 2.8790669122160835, |
|
"grad_norm": 3.968477964401245, |
|
"learning_rate": 1.2099447513812155e-06, |
|
"loss": 2.3346, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 2.882136279926335, |
|
"grad_norm": 4.449561595916748, |
|
"learning_rate": 1.1792510742786985e-06, |
|
"loss": 2.1736, |
|
"step": 46950 |
|
}, |
|
{ |
|
"epoch": 2.885205647636587, |
|
"grad_norm": 3.945478916168213, |
|
"learning_rate": 1.1485573971761818e-06, |
|
"loss": 2.2412, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.8882750153468386, |
|
"grad_norm": 3.1608164310455322, |
|
"learning_rate": 1.1178637200736648e-06, |
|
"loss": 2.2238, |
|
"step": 47050 |
|
}, |
|
{ |
|
"epoch": 2.89134438305709, |
|
"grad_norm": 4.12243127822876, |
|
"learning_rate": 1.087170042971148e-06, |
|
"loss": 2.1938, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 2.8944137507673418, |
|
"grad_norm": 3.392117977142334, |
|
"learning_rate": 1.056476365868631e-06, |
|
"loss": 2.1843, |
|
"step": 47150 |
|
}, |
|
{ |
|
"epoch": 2.8974831184775938, |
|
"grad_norm": 3.5791280269622803, |
|
"learning_rate": 1.0257826887661143e-06, |
|
"loss": 2.192, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 2.9005524861878453, |
|
"grad_norm": 3.195387363433838, |
|
"learning_rate": 9.950890116635972e-07, |
|
"loss": 2.221, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 2.903621853898097, |
|
"grad_norm": 4.41968297958374, |
|
"learning_rate": 9.643953345610804e-07, |
|
"loss": 2.2192, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 2.906691221608349, |
|
"grad_norm": 3.8508644104003906, |
|
"learning_rate": 9.337016574585636e-07, |
|
"loss": 2.1182, |
|
"step": 47350 |
|
}, |
|
{ |
|
"epoch": 2.9097605893186005, |
|
"grad_norm": 3.493018865585327, |
|
"learning_rate": 9.030079803560467e-07, |
|
"loss": 2.1742, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 2.912829957028852, |
|
"grad_norm": 3.8892369270324707, |
|
"learning_rate": 8.723143032535298e-07, |
|
"loss": 2.1974, |
|
"step": 47450 |
|
}, |
|
{ |
|
"epoch": 2.9158993247391036, |
|
"grad_norm": 3.238802671432495, |
|
"learning_rate": 8.416206261510129e-07, |
|
"loss": 2.3495, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.918968692449355, |
|
"grad_norm": 3.5974628925323486, |
|
"learning_rate": 8.109269490484961e-07, |
|
"loss": 2.2413, |
|
"step": 47550 |
|
}, |
|
{ |
|
"epoch": 2.922038060159607, |
|
"grad_norm": 3.806520938873291, |
|
"learning_rate": 7.802332719459792e-07, |
|
"loss": 2.2885, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 2.925107427869859, |
|
"grad_norm": 4.2564496994018555, |
|
"learning_rate": 7.495395948434623e-07, |
|
"loss": 2.2423, |
|
"step": 47650 |
|
}, |
|
{ |
|
"epoch": 2.9281767955801103, |
|
"grad_norm": 3.9945321083068848, |
|
"learning_rate": 7.188459177409454e-07, |
|
"loss": 2.2334, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 2.9312461632903624, |
|
"grad_norm": 3.3918559551239014, |
|
"learning_rate": 6.881522406384285e-07, |
|
"loss": 2.1502, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 2.934315531000614, |
|
"grad_norm": 4.716714859008789, |
|
"learning_rate": 6.574585635359117e-07, |
|
"loss": 2.2168, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 2.9373848987108655, |
|
"grad_norm": 4.033308506011963, |
|
"learning_rate": 6.267648864333948e-07, |
|
"loss": 2.2624, |
|
"step": 47850 |
|
}, |
|
{ |
|
"epoch": 2.940454266421117, |
|
"grad_norm": 5.724266052246094, |
|
"learning_rate": 5.960712093308779e-07, |
|
"loss": 2.1917, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 2.943523634131369, |
|
"grad_norm": 3.851032257080078, |
|
"learning_rate": 5.65377532228361e-07, |
|
"loss": 2.2129, |
|
"step": 47950 |
|
}, |
|
{ |
|
"epoch": 2.9465930018416207, |
|
"grad_norm": 3.436573028564453, |
|
"learning_rate": 5.346838551258441e-07, |
|
"loss": 2.3044, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.949662369551872, |
|
"grad_norm": 3.4812095165252686, |
|
"learning_rate": 5.039901780233272e-07, |
|
"loss": 2.2165, |
|
"step": 48050 |
|
}, |
|
{ |
|
"epoch": 2.9527317372621242, |
|
"grad_norm": 3.4163248538970947, |
|
"learning_rate": 4.732965009208103e-07, |
|
"loss": 2.1441, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 2.955801104972376, |
|
"grad_norm": 4.04727840423584, |
|
"learning_rate": 4.426028238182934e-07, |
|
"loss": 2.2139, |
|
"step": 48150 |
|
}, |
|
{ |
|
"epoch": 2.9588704726826274, |
|
"grad_norm": 3.314655065536499, |
|
"learning_rate": 4.119091467157765e-07, |
|
"loss": 2.2098, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 2.961939840392879, |
|
"grad_norm": 4.228841304779053, |
|
"learning_rate": 3.812154696132597e-07, |
|
"loss": 2.1626, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 2.9650092081031305, |
|
"grad_norm": 4.127499580383301, |
|
"learning_rate": 3.505217925107428e-07, |
|
"loss": 2.3014, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 2.9680785758133825, |
|
"grad_norm": 3.7830405235290527, |
|
"learning_rate": 3.198281154082259e-07, |
|
"loss": 2.2795, |
|
"step": 48350 |
|
}, |
|
{ |
|
"epoch": 2.971147943523634, |
|
"grad_norm": 5.398400783538818, |
|
"learning_rate": 2.89134438305709e-07, |
|
"loss": 2.2314, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 2.9742173112338857, |
|
"grad_norm": 4.308445453643799, |
|
"learning_rate": 2.584407612031921e-07, |
|
"loss": 2.3097, |
|
"step": 48450 |
|
}, |
|
{ |
|
"epoch": 2.9772866789441377, |
|
"grad_norm": 3.5713133811950684, |
|
"learning_rate": 2.277470841006753e-07, |
|
"loss": 2.1968, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.9803560466543892, |
|
"grad_norm": 3.5215930938720703, |
|
"learning_rate": 1.970534069981584e-07, |
|
"loss": 2.2354, |
|
"step": 48550 |
|
}, |
|
{ |
|
"epoch": 2.983425414364641, |
|
"grad_norm": 3.120506763458252, |
|
"learning_rate": 1.663597298956415e-07, |
|
"loss": 2.2237, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 2.9864947820748924, |
|
"grad_norm": 6.137388706207275, |
|
"learning_rate": 1.3566605279312464e-07, |
|
"loss": 2.235, |
|
"step": 48650 |
|
}, |
|
{ |
|
"epoch": 2.9895641497851444, |
|
"grad_norm": 3.3079631328582764, |
|
"learning_rate": 1.0497237569060774e-07, |
|
"loss": 2.1549, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 2.992633517495396, |
|
"grad_norm": 4.21074914932251, |
|
"learning_rate": 7.427869858809085e-08, |
|
"loss": 2.1818, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 2.9957028852056475, |
|
"grad_norm": 3.8667783737182617, |
|
"learning_rate": 4.358502148557397e-08, |
|
"loss": 2.3276, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 2.9987722529158995, |
|
"grad_norm": 4.376747131347656, |
|
"learning_rate": 1.2891344383057091e-08, |
|
"loss": 2.2735, |
|
"step": 48850 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 48870, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.816879851189043e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|