alex-shvets's picture
Upload folder using huggingface_hub
447bf56 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 29370,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17024174327545114,
"grad_norm": 0.6650531888008118,
"learning_rate": 1.96595165134491e-05,
"loss": 0.2305,
"step": 500
},
{
"epoch": 0.34048348655090227,
"grad_norm": 2.3951523303985596,
"learning_rate": 1.9319033026898198e-05,
"loss": 0.1633,
"step": 1000
},
{
"epoch": 0.5107252298263534,
"grad_norm": 0.9463224411010742,
"learning_rate": 1.8978549540347296e-05,
"loss": 0.1548,
"step": 1500
},
{
"epoch": 0.6809669731018045,
"grad_norm": 1.2913334369659424,
"learning_rate": 1.8638066053796395e-05,
"loss": 0.1487,
"step": 2000
},
{
"epoch": 0.8512087163772557,
"grad_norm": 1.0490585565567017,
"learning_rate": 1.829758256724549e-05,
"loss": 0.1435,
"step": 2500
},
{
"epoch": 1.0214504596527068,
"grad_norm": 1.5652408599853516,
"learning_rate": 1.7957099080694588e-05,
"loss": 0.1397,
"step": 3000
},
{
"epoch": 1.191692202928158,
"grad_norm": 1.1205641031265259,
"learning_rate": 1.7616615594143686e-05,
"loss": 0.1283,
"step": 3500
},
{
"epoch": 1.361933946203609,
"grad_norm": 1.0744216442108154,
"learning_rate": 1.727613210759278e-05,
"loss": 0.1292,
"step": 4000
},
{
"epoch": 1.5321756894790601,
"grad_norm": 1.089113712310791,
"learning_rate": 1.693564862104188e-05,
"loss": 0.1273,
"step": 4500
},
{
"epoch": 1.7024174327545114,
"grad_norm": 2.334705114364624,
"learning_rate": 1.6595165134490977e-05,
"loss": 0.1275,
"step": 5000
},
{
"epoch": 1.8726591760299627,
"grad_norm": 1.1323754787445068,
"learning_rate": 1.6254681647940076e-05,
"loss": 0.1251,
"step": 5500
},
{
"epoch": 2.0429009193054135,
"grad_norm": 0.8757261633872986,
"learning_rate": 1.5914198161389174e-05,
"loss": 0.1213,
"step": 6000
},
{
"epoch": 2.213142662580865,
"grad_norm": 1.1232839822769165,
"learning_rate": 1.5573714674838272e-05,
"loss": 0.1104,
"step": 6500
},
{
"epoch": 2.383384405856316,
"grad_norm": 0.8715490698814392,
"learning_rate": 1.5233231188287369e-05,
"loss": 0.1099,
"step": 7000
},
{
"epoch": 2.553626149131767,
"grad_norm": 1.2656769752502441,
"learning_rate": 1.4892747701736467e-05,
"loss": 0.1102,
"step": 7500
},
{
"epoch": 2.723867892407218,
"grad_norm": 1.1669204235076904,
"learning_rate": 1.4552264215185565e-05,
"loss": 0.1101,
"step": 8000
},
{
"epoch": 2.8941096356826694,
"grad_norm": 1.0073705911636353,
"learning_rate": 1.4211780728634664e-05,
"loss": 0.1085,
"step": 8500
},
{
"epoch": 3.0643513789581207,
"grad_norm": 1.1393821239471436,
"learning_rate": 1.3871297242083762e-05,
"loss": 0.1027,
"step": 9000
},
{
"epoch": 3.2345931222335715,
"grad_norm": 1.4679887294769287,
"learning_rate": 1.3530813755532857e-05,
"loss": 0.0926,
"step": 9500
},
{
"epoch": 3.404834865509023,
"grad_norm": 0.8374710083007812,
"learning_rate": 1.3190330268981955e-05,
"loss": 0.0925,
"step": 10000
},
{
"epoch": 3.575076608784474,
"grad_norm": 1.2514032125473022,
"learning_rate": 1.2849846782431053e-05,
"loss": 0.0927,
"step": 10500
},
{
"epoch": 3.7453183520599254,
"grad_norm": 1.5251351594924927,
"learning_rate": 1.250936329588015e-05,
"loss": 0.0929,
"step": 11000
},
{
"epoch": 3.915560095335376,
"grad_norm": 1.0668872594833374,
"learning_rate": 1.2168879809329248e-05,
"loss": 0.0923,
"step": 11500
},
{
"epoch": 4.085801838610827,
"grad_norm": 1.0528796911239624,
"learning_rate": 1.1828396322778346e-05,
"loss": 0.0848,
"step": 12000
},
{
"epoch": 4.256043581886279,
"grad_norm": 1.316041111946106,
"learning_rate": 1.1487912836227445e-05,
"loss": 0.0767,
"step": 12500
},
{
"epoch": 4.42628532516173,
"grad_norm": 1.6180927753448486,
"learning_rate": 1.1147429349676541e-05,
"loss": 0.077,
"step": 13000
},
{
"epoch": 4.596527068437181,
"grad_norm": 1.2156362533569336,
"learning_rate": 1.080694586312564e-05,
"loss": 0.0773,
"step": 13500
},
{
"epoch": 4.766768811712632,
"grad_norm": 1.621887445449829,
"learning_rate": 1.0466462376574738e-05,
"loss": 0.0773,
"step": 14000
},
{
"epoch": 4.937010554988083,
"grad_norm": 1.5306437015533447,
"learning_rate": 1.0125978890023836e-05,
"loss": 0.0774,
"step": 14500
},
{
"epoch": 5.107252298263534,
"grad_norm": 22.37914276123047,
"learning_rate": 9.785495403472932e-06,
"loss": 0.0678,
"step": 15000
},
{
"epoch": 5.2774940415389855,
"grad_norm": 1.3330860137939453,
"learning_rate": 9.44501191692203e-06,
"loss": 0.0634,
"step": 15500
},
{
"epoch": 5.447735784814436,
"grad_norm": 1.9692567586898804,
"learning_rate": 9.104528430371127e-06,
"loss": 0.0634,
"step": 16000
},
{
"epoch": 5.617977528089888,
"grad_norm": 1.3089221715927124,
"learning_rate": 8.764044943820226e-06,
"loss": 0.0635,
"step": 16500
},
{
"epoch": 5.788219271365339,
"grad_norm": 1.5806821584701538,
"learning_rate": 8.423561457269324e-06,
"loss": 0.0637,
"step": 17000
},
{
"epoch": 5.95846101464079,
"grad_norm": 1.579941987991333,
"learning_rate": 8.08307797071842e-06,
"loss": 0.0633,
"step": 17500
},
{
"epoch": 6.128702757916241,
"grad_norm": 1.5726784467697144,
"learning_rate": 7.742594484167519e-06,
"loss": 0.054,
"step": 18000
},
{
"epoch": 6.298944501191692,
"grad_norm": 1.140791654586792,
"learning_rate": 7.402110997616616e-06,
"loss": 0.052,
"step": 18500
},
{
"epoch": 6.469186244467143,
"grad_norm": 1.6548409461975098,
"learning_rate": 7.061627511065714e-06,
"loss": 0.0516,
"step": 19000
},
{
"epoch": 6.639427987742595,
"grad_norm": 1.3514069318771362,
"learning_rate": 6.721144024514812e-06,
"loss": 0.0522,
"step": 19500
},
{
"epoch": 6.809669731018046,
"grad_norm": 1.5590009689331055,
"learning_rate": 6.38066053796391e-06,
"loss": 0.0518,
"step": 20000
},
{
"epoch": 6.9799114742934965,
"grad_norm": 1.2986799478530884,
"learning_rate": 6.0401770514130066e-06,
"loss": 0.0524,
"step": 20500
},
{
"epoch": 7.150153217568948,
"grad_norm": 1.5317639112472534,
"learning_rate": 5.699693564862104e-06,
"loss": 0.044,
"step": 21000
},
{
"epoch": 7.320394960844399,
"grad_norm": 2.344708204269409,
"learning_rate": 5.359210078311202e-06,
"loss": 0.0415,
"step": 21500
},
{
"epoch": 7.49063670411985,
"grad_norm": 3.3057548999786377,
"learning_rate": 5.0187265917603005e-06,
"loss": 0.0418,
"step": 22000
},
{
"epoch": 7.6608784473953015,
"grad_norm": 1.3382242918014526,
"learning_rate": 4.678243105209398e-06,
"loss": 0.0419,
"step": 22500
},
{
"epoch": 7.831120190670752,
"grad_norm": 1.7018738985061646,
"learning_rate": 4.337759618658495e-06,
"loss": 0.0421,
"step": 23000
},
{
"epoch": 8.001361933946203,
"grad_norm": 0.9316732883453369,
"learning_rate": 3.997276132107593e-06,
"loss": 0.0414,
"step": 23500
},
{
"epoch": 8.171603677221654,
"grad_norm": 1.4249956607818604,
"learning_rate": 3.656792645556691e-06,
"loss": 0.0346,
"step": 24000
},
{
"epoch": 8.341845420497107,
"grad_norm": 1.263279914855957,
"learning_rate": 3.3163091590057884e-06,
"loss": 0.0345,
"step": 24500
},
{
"epoch": 8.512087163772557,
"grad_norm": 2.6162939071655273,
"learning_rate": 2.9758256724548862e-06,
"loss": 0.0342,
"step": 25000
},
{
"epoch": 8.682328907048008,
"grad_norm": 1.2574002742767334,
"learning_rate": 2.635342185903984e-06,
"loss": 0.0345,
"step": 25500
},
{
"epoch": 8.85257065032346,
"grad_norm": 5.4230732917785645,
"learning_rate": 2.2948586993530815e-06,
"loss": 0.0344,
"step": 26000
},
{
"epoch": 9.02281239359891,
"grad_norm": 0.885810136795044,
"learning_rate": 1.9543752128021793e-06,
"loss": 0.0333,
"step": 26500
},
{
"epoch": 9.19305413687436,
"grad_norm": 1.7516717910766602,
"learning_rate": 1.6138917262512767e-06,
"loss": 0.0291,
"step": 27000
},
{
"epoch": 9.363295880149813,
"grad_norm": 1.1372159719467163,
"learning_rate": 1.2734082397003748e-06,
"loss": 0.0293,
"step": 27500
},
{
"epoch": 9.533537623425264,
"grad_norm": 0.9269993305206299,
"learning_rate": 9.329247531494723e-07,
"loss": 0.0294,
"step": 28000
},
{
"epoch": 9.703779366700715,
"grad_norm": 1.229074239730835,
"learning_rate": 5.9244126659857e-07,
"loss": 0.0291,
"step": 28500
},
{
"epoch": 9.874021109976166,
"grad_norm": 2.4099299907684326,
"learning_rate": 2.519577800476677e-07,
"loss": 0.0289,
"step": 29000
},
{
"epoch": 10.0,
"step": 29370,
"total_flos": 8.758967154215731e+17,
"train_loss": 0.07915824020562384,
"train_runtime": 29556.1772,
"train_samples_per_second": 31.797,
"train_steps_per_second": 0.994
}
],
"logging_steps": 500,
"max_steps": 29370,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.758967154215731e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}