|
{ |
|
"best_metric": 0.6414651274681091, |
|
"best_model_checkpoint": "output/checkpoint-170000", |
|
"epoch": 15.0, |
|
"global_step": 171660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.98e-06, |
|
"loss": 2.0401, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 1.397, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.4979999999999999e-05, |
|
"loss": 1.302, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9980000000000002e-05, |
|
"loss": 1.242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.4970000000000003e-05, |
|
"loss": 1.1985, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.0940483808517456, |
|
"eval_runtime": 3092.4183, |
|
"eval_samples_per_second": 24.931, |
|
"eval_steps_per_second": 1.558, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9970000000000003e-05, |
|
"loss": 1.1712, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4970000000000006e-05, |
|
"loss": 1.1419, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.9970000000000005e-05, |
|
"loss": 1.1259, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.497e-05, |
|
"loss": 1.1088, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.997e-05, |
|
"loss": 1.0937, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.0032908916473389, |
|
"eval_runtime": 3130.3678, |
|
"eval_samples_per_second": 24.628, |
|
"eval_steps_per_second": 1.539, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.497e-05, |
|
"loss": 1.0865, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.9970000000000004e-05, |
|
"loss": 1.0763, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 6.497000000000001e-05, |
|
"loss": 1.0666, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.997e-05, |
|
"loss": 1.0704, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.497000000000001e-05, |
|
"loss": 1.0675, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.9753177165985107, |
|
"eval_runtime": 3110.5948, |
|
"eval_samples_per_second": 24.785, |
|
"eval_steps_per_second": 1.549, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.997e-05, |
|
"loss": 1.0647, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.495e-05, |
|
"loss": 1.0493, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.995e-05, |
|
"loss": 1.0583, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.495e-05, |
|
"loss": 1.0582, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.995e-05, |
|
"loss": 1.0565, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.9801222085952759, |
|
"eval_runtime": 3136.3647, |
|
"eval_samples_per_second": 24.581, |
|
"eval_steps_per_second": 1.536, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.895171537484117e-05, |
|
"loss": 1.0507, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.789284201609487e-05, |
|
"loss": 1.0443, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.683396865734859e-05, |
|
"loss": 1.0318, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.577721304531978e-05, |
|
"loss": 1.0298, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.471833968657349e-05, |
|
"loss": 1.0244, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.9525517225265503, |
|
"eval_runtime": 3137.7232, |
|
"eval_samples_per_second": 24.571, |
|
"eval_steps_per_second": 1.536, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.365946632782719e-05, |
|
"loss": 1.024, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.26005929690809e-05, |
|
"loss": 1.0054, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.154383735705209e-05, |
|
"loss": 1.0105, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.04849639983058e-05, |
|
"loss": 1.0035, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.942609063955952e-05, |
|
"loss": 0.9943, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.9298429489135742, |
|
"eval_runtime": 3051.8254, |
|
"eval_samples_per_second": 25.262, |
|
"eval_steps_per_second": 1.579, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.836721728081322e-05, |
|
"loss": 0.9959, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.731046166878442e-05, |
|
"loss": 0.9874, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.625158831003812e-05, |
|
"loss": 0.9834, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.519483269800933e-05, |
|
"loss": 0.9764, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.413595933926302e-05, |
|
"loss": 0.9799, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 0.9034547805786133, |
|
"eval_runtime": 3114.4981, |
|
"eval_samples_per_second": 24.754, |
|
"eval_steps_per_second": 1.547, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.307708598051674e-05, |
|
"loss": 0.9642, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.201821262177044e-05, |
|
"loss": 0.9671, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.095933926302415e-05, |
|
"loss": 0.9637, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.990046590427785e-05, |
|
"loss": 0.9545, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.884159254553156e-05, |
|
"loss": 0.95, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.8834716081619263, |
|
"eval_runtime": 3125.2028, |
|
"eval_samples_per_second": 24.669, |
|
"eval_steps_per_second": 1.542, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.778483693350275e-05, |
|
"loss": 0.9556, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.672596357475647e-05, |
|
"loss": 0.9519, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.566709021601018e-05, |
|
"loss": 0.9448, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.460821685726388e-05, |
|
"loss": 0.9382, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.354934349851758e-05, |
|
"loss": 0.933, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.8636178970336914, |
|
"eval_runtime": 3087.2735, |
|
"eval_samples_per_second": 24.972, |
|
"eval_steps_per_second": 1.561, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.249258788648878e-05, |
|
"loss": 0.93, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.14337145277425e-05, |
|
"loss": 0.9193, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.037484116899618e-05, |
|
"loss": 0.9141, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 6.931596781024989e-05, |
|
"loss": 0.9092, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 6.82570944515036e-05, |
|
"loss": 0.9079, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.8507079482078552, |
|
"eval_runtime": 3106.3211, |
|
"eval_samples_per_second": 24.819, |
|
"eval_steps_per_second": 1.551, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 6.72003388394748e-05, |
|
"loss": 0.9041, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 6.614146548072851e-05, |
|
"loss": 0.9004, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.508259212198221e-05, |
|
"loss": 0.8995, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.402371876323592e-05, |
|
"loss": 0.8922, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.296484540448962e-05, |
|
"loss": 0.8938, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.8396568894386292, |
|
"eval_runtime": 3033.9082, |
|
"eval_samples_per_second": 25.411, |
|
"eval_steps_per_second": 1.588, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.190597204574333e-05, |
|
"loss": 0.8897, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.084709868699704e-05, |
|
"loss": 0.8855, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 5.979034307496824e-05, |
|
"loss": 0.8795, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.873146971622194e-05, |
|
"loss": 0.8841, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5.767259635747565e-05, |
|
"loss": 0.8781, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 0.8194745182991028, |
|
"eval_runtime": 3075.2096, |
|
"eval_samples_per_second": 25.07, |
|
"eval_steps_per_second": 1.567, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.6613722998729355e-05, |
|
"loss": 0.8776, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.555484963998306e-05, |
|
"loss": 0.8777, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5.4495976281236774e-05, |
|
"loss": 0.865, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.3437102922490467e-05, |
|
"loss": 0.8752, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.237822956374417e-05, |
|
"loss": 0.8647, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 0.8087666630744934, |
|
"eval_runtime": 3169.435, |
|
"eval_samples_per_second": 24.325, |
|
"eval_steps_per_second": 1.52, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.1321473951715384e-05, |
|
"loss": 0.8599, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.0262600592969076e-05, |
|
"loss": 0.8568, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.920372723422279e-05, |
|
"loss": 0.8528, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.8144853875476495e-05, |
|
"loss": 0.8463, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.70859805167302e-05, |
|
"loss": 0.8422, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 0.7954000234603882, |
|
"eval_runtime": 3127.0452, |
|
"eval_samples_per_second": 24.655, |
|
"eval_steps_per_second": 1.541, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.602710715798391e-05, |
|
"loss": 0.8373, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.4968233799237613e-05, |
|
"loss": 0.8268, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.390936044049132e-05, |
|
"loss": 0.8426, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.2850487081745026e-05, |
|
"loss": 0.837, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.179373146971622e-05, |
|
"loss": 0.831, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 0.7871229648590088, |
|
"eval_runtime": 3095.9065, |
|
"eval_samples_per_second": 24.903, |
|
"eval_steps_per_second": 1.557, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.073485811096993e-05, |
|
"loss": 0.8343, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.9675984752223635e-05, |
|
"loss": 0.825, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.861711139347734e-05, |
|
"loss": 0.8195, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.755823803473105e-05, |
|
"loss": 0.8219, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.650148242270225e-05, |
|
"loss": 0.8173, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.7720773220062256, |
|
"eval_runtime": 3173.0037, |
|
"eval_samples_per_second": 24.297, |
|
"eval_steps_per_second": 1.519, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.544260906395595e-05, |
|
"loss": 0.8148, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.4385853451927155e-05, |
|
"loss": 0.8102, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.3329097839898346e-05, |
|
"loss": 0.8103, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.227022448115206e-05, |
|
"loss": 0.8116, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.121346886912325e-05, |
|
"loss": 0.8072, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 0.7610893845558167, |
|
"eval_runtime": 3130.5399, |
|
"eval_samples_per_second": 24.627, |
|
"eval_steps_per_second": 1.539, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.015459551037696e-05, |
|
"loss": 0.802, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.9095722151630668e-05, |
|
"loss": 0.7994, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.8036848792884374e-05, |
|
"loss": 0.8029, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.697797543413808e-05, |
|
"loss": 0.7975, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.5919102075391783e-05, |
|
"loss": 0.8011, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.7531821131706238, |
|
"eval_runtime": 3062.0684, |
|
"eval_samples_per_second": 25.178, |
|
"eval_steps_per_second": 1.574, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.486022871664549e-05, |
|
"loss": 0.7935, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.3801355357899195e-05, |
|
"loss": 0.793, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.2742481999152905e-05, |
|
"loss": 0.788, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.1683608640406607e-05, |
|
"loss": 0.7853, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.0624735281660313e-05, |
|
"loss": 0.7828, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 0.7431035041809082, |
|
"eval_runtime": 3053.0232, |
|
"eval_samples_per_second": 25.252, |
|
"eval_steps_per_second": 1.578, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.956586192291402e-05, |
|
"loss": 0.7715, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.8506988564167726e-05, |
|
"loss": 0.7752, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.7450232952138926e-05, |
|
"loss": 0.7714, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.639135959339263e-05, |
|
"loss": 0.7736, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.533248623464634e-05, |
|
"loss": 0.7691, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 0.7367000579833984, |
|
"eval_runtime": 3035.0963, |
|
"eval_samples_per_second": 25.402, |
|
"eval_steps_per_second": 1.588, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.4273612875900045e-05, |
|
"loss": 0.7685, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.3214739517153749e-05, |
|
"loss": 0.7639, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2157983905124948e-05, |
|
"loss": 0.7654, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.1099110546378654e-05, |
|
"loss": 0.7677, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.0040237187632359e-05, |
|
"loss": 0.7659, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 0.7291901111602783, |
|
"eval_runtime": 3110.3355, |
|
"eval_samples_per_second": 24.787, |
|
"eval_steps_per_second": 1.549, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 8.981363828886067e-06, |
|
"loss": 0.763, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 7.922490470139773e-06, |
|
"loss": 0.759, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 6.863617111393478e-06, |
|
"loss": 0.7559, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 5.804743752647183e-06, |
|
"loss": 0.7547, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 4.74587039390089e-06, |
|
"loss": 0.7606, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 0.7244983911514282, |
|
"eval_runtime": 3076.3307, |
|
"eval_samples_per_second": 25.061, |
|
"eval_steps_per_second": 1.566, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 5.6455381080045965e-05, |
|
"loss": 0.7842, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.597663730371505e-05, |
|
"loss": 0.7983, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.549789352738415e-05, |
|
"loss": 0.8032, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.5019149751053236e-05, |
|
"loss": 0.8037, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 5.454040597472233e-05, |
|
"loss": 0.8082, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 0.7695716023445129, |
|
"eval_runtime": 3095.7063, |
|
"eval_samples_per_second": 24.904, |
|
"eval_steps_per_second": 1.557, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 5.406166219839143e-05, |
|
"loss": 0.8102, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 5.358291842206051e-05, |
|
"loss": 0.8067, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 5.310417464572961e-05, |
|
"loss": 0.8103, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 5.2627345844504025e-05, |
|
"loss": 0.8088, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 5.214955955572578e-05, |
|
"loss": 0.8114, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.7694710493087769, |
|
"eval_runtime": 3084.9083, |
|
"eval_samples_per_second": 24.991, |
|
"eval_steps_per_second": 1.562, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 5.167081577939487e-05, |
|
"loss": 0.8073, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 5.119207200306396e-05, |
|
"loss": 0.8019, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 5.071332822673306e-05, |
|
"loss": 0.7971, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 5.0234584450402144e-05, |
|
"loss": 0.8024, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.97567981616239e-05, |
|
"loss": 0.8022, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 0.7612630128860474, |
|
"eval_runtime": 3135.2609, |
|
"eval_samples_per_second": 24.59, |
|
"eval_steps_per_second": 1.537, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.927805438529299e-05, |
|
"loss": 0.8071, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.8799310608962085e-05, |
|
"loss": 0.793, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.832152432018384e-05, |
|
"loss": 0.798, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 4.7842780543852934e-05, |
|
"loss": 0.7976, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 4.7364036767522026e-05, |
|
"loss": 0.7986, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 0.7558260560035706, |
|
"eval_runtime": 3154.5823, |
|
"eval_samples_per_second": 24.439, |
|
"eval_steps_per_second": 1.528, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.688529299119111e-05, |
|
"loss": 0.7918, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.6406549214860205e-05, |
|
"loss": 0.7941, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.5927805438529304e-05, |
|
"loss": 0.7976, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 4.5449061662198396e-05, |
|
"loss": 0.7953, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.497031788586749e-05, |
|
"loss": 0.8018, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 0.7478091716766357, |
|
"eval_runtime": 3109.9687, |
|
"eval_samples_per_second": 24.79, |
|
"eval_steps_per_second": 1.55, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 4.4491574109536574e-05, |
|
"loss": 0.7939, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.401283033320567e-05, |
|
"loss": 0.7844, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.3534086556874766e-05, |
|
"loss": 0.7853, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 4.305534278054386e-05, |
|
"loss": 0.7776, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.2576599004212944e-05, |
|
"loss": 0.782, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 0.7434529662132263, |
|
"eval_runtime": 3111.4708, |
|
"eval_samples_per_second": 24.778, |
|
"eval_steps_per_second": 1.549, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.209785522788204e-05, |
|
"loss": 0.7812, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.161911145155113e-05, |
|
"loss": 0.7816, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 4.1141325162772885e-05, |
|
"loss": 0.7745, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 4.066258138644198e-05, |
|
"loss": 0.7776, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 4.018383761011107e-05, |
|
"loss": 0.7743, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_loss": 0.7367435097694397, |
|
"eval_runtime": 3109.6155, |
|
"eval_samples_per_second": 24.793, |
|
"eval_steps_per_second": 1.55, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.970509383378016e-05, |
|
"loss": 0.7762, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.9226350057449255e-05, |
|
"loss": 0.7728, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.874760628111835e-05, |
|
"loss": 0.7743, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 3.826886250478744e-05, |
|
"loss": 0.7715, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.779011872845653e-05, |
|
"loss": 0.774, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 0.7312998175621033, |
|
"eval_runtime": 3143.7734, |
|
"eval_samples_per_second": 24.523, |
|
"eval_steps_per_second": 1.533, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.7311374952125625e-05, |
|
"loss": 0.7656, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.683263117579472e-05, |
|
"loss": 0.7653, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.635388739946381e-05, |
|
"loss": 0.7619, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.587610111068556e-05, |
|
"loss": 0.765, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.539735733435465e-05, |
|
"loss": 0.7692, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 0.7270153164863586, |
|
"eval_runtime": 3083.4104, |
|
"eval_samples_per_second": 25.003, |
|
"eval_steps_per_second": 1.563, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 3.491957104557641e-05, |
|
"loss": 0.7676, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 3.44408272692455e-05, |
|
"loss": 0.7617, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.396208349291459e-05, |
|
"loss": 0.765, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.3483339716583685e-05, |
|
"loss": 0.7609, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.300459594025278e-05, |
|
"loss": 0.7604, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_loss": 0.7200314998626709, |
|
"eval_runtime": 3077.861, |
|
"eval_samples_per_second": 25.049, |
|
"eval_steps_per_second": 1.566, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.252585216392187e-05, |
|
"loss": 0.7513, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 3.204710838759096e-05, |
|
"loss": 0.7549, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 3.156932209881271e-05, |
|
"loss": 0.7485, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.109057832248181e-05, |
|
"loss": 0.7486, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.0611834546150903e-05, |
|
"loss": 0.7468, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 0.7163689136505127, |
|
"eval_runtime": 3165.721, |
|
"eval_samples_per_second": 24.353, |
|
"eval_steps_per_second": 1.522, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 3.0133090769819993e-05, |
|
"loss": 0.7471, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 2.9654346993489085e-05, |
|
"loss": 0.7473, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 2.9175603217158177e-05, |
|
"loss": 0.747, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 2.8696859440827273e-05, |
|
"loss": 0.743, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 2.8219073152049026e-05, |
|
"loss": 0.7486, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_loss": 0.7117038369178772, |
|
"eval_runtime": 3102.6798, |
|
"eval_samples_per_second": 24.848, |
|
"eval_steps_per_second": 1.553, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.7740329375718115e-05, |
|
"loss": 0.7411, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 2.7261585599387208e-05, |
|
"loss": 0.7418, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 2.6782841823056303e-05, |
|
"loss": 0.7443, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 2.6305055534278056e-05, |
|
"loss": 0.7403, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 2.582631175794715e-05, |
|
"loss": 0.7399, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 0.7042549252510071, |
|
"eval_runtime": 3110.6014, |
|
"eval_samples_per_second": 24.785, |
|
"eval_steps_per_second": 1.549, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 2.5347567981616238e-05, |
|
"loss": 0.7393, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 2.4868824205285333e-05, |
|
"loss": 0.7404, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 2.4390080428954426e-05, |
|
"loss": 0.7356, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.391133665262352e-05, |
|
"loss": 0.7395, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.3432592876292607e-05, |
|
"loss": 0.7306, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 0.6955912709236145, |
|
"eval_runtime": 3139.2914, |
|
"eval_samples_per_second": 24.558, |
|
"eval_steps_per_second": 1.535, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 2.2953849099961703e-05, |
|
"loss": 0.735, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 2.2475105323630792e-05, |
|
"loss": 0.7281, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.1996361547299888e-05, |
|
"loss": 0.732, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 2.1517617770968977e-05, |
|
"loss": 0.7219, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 2.1039831482190733e-05, |
|
"loss": 0.7243, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 0.695923924446106, |
|
"eval_runtime": 3092.5373, |
|
"eval_samples_per_second": 24.93, |
|
"eval_steps_per_second": 1.558, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 2.0561087705859826e-05, |
|
"loss": 0.7243, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 2.0082343929528918e-05, |
|
"loss": 0.7258, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 1.960360015319801e-05, |
|
"loss": 0.7211, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 1.91248563768671e-05, |
|
"loss": 0.7214, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1.8646112600536196e-05, |
|
"loss": 0.7132, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"eval_loss": 0.6916212439537048, |
|
"eval_runtime": 3163.2312, |
|
"eval_samples_per_second": 24.373, |
|
"eval_steps_per_second": 1.523, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.8167368824205285e-05, |
|
"loss": 0.7188, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 1.768862504787438e-05, |
|
"loss": 0.717, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.7210838759096133e-05, |
|
"loss": 0.7189, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 1.6732094982765226e-05, |
|
"loss": 0.7206, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 1.625430869398698e-05, |
|
"loss": 0.71, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 0.6853311061859131, |
|
"eval_runtime": 3146.2403, |
|
"eval_samples_per_second": 24.504, |
|
"eval_steps_per_second": 1.532, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 1.577556491765607e-05, |
|
"loss": 0.7148, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 1.5296821141325163e-05, |
|
"loss": 0.7169, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.4818077364994256e-05, |
|
"loss": 0.7155, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.4339333588663348e-05, |
|
"loss": 0.713, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 1.3860589812332439e-05, |
|
"loss": 0.7128, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 0.6855071783065796, |
|
"eval_runtime": 3110.846, |
|
"eval_samples_per_second": 24.783, |
|
"eval_steps_per_second": 1.549, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1.3381846036001533e-05, |
|
"loss": 0.7107, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.2903102259670624e-05, |
|
"loss": 0.7142, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1.2424358483339716e-05, |
|
"loss": 0.7035, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.1945614707008809e-05, |
|
"loss": 0.7061, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.1467828418230563e-05, |
|
"loss": 0.7088, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 0.680884063243866, |
|
"eval_runtime": 3113.4696, |
|
"eval_samples_per_second": 24.762, |
|
"eval_steps_per_second": 1.548, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.0989084641899656e-05, |
|
"loss": 0.7039, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 1.0510340865568748e-05, |
|
"loss": 0.6983, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.0032554576790503e-05, |
|
"loss": 0.7028, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 9.553810800459595e-06, |
|
"loss": 0.7002, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 9.075067024128686e-06, |
|
"loss": 0.7002, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 0.6783619523048401, |
|
"eval_runtime": 3133.9738, |
|
"eval_samples_per_second": 24.6, |
|
"eval_steps_per_second": 1.538, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 8.596323247797778e-06, |
|
"loss": 0.699, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 8.11757947146687e-06, |
|
"loss": 0.6927, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 7.638835695135963e-06, |
|
"loss": 0.6976, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 7.1600919188050565e-06, |
|
"loss": 0.6978, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.681348142474147e-06, |
|
"loss": 0.6953, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 0.6736627221107483, |
|
"eval_runtime": 3133.4547, |
|
"eval_samples_per_second": 24.604, |
|
"eval_steps_per_second": 1.538, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 6.2026043661432406e-06, |
|
"loss": 0.6892, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.723860589812333e-06, |
|
"loss": 0.6989, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.2451168134814254e-06, |
|
"loss": 0.6975, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.767330524703179e-06, |
|
"loss": 0.6977, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 4.288586748372271e-06, |
|
"loss": 0.695, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_loss": 0.6714410185813904, |
|
"eval_runtime": 3139.9991, |
|
"eval_samples_per_second": 24.553, |
|
"eval_steps_per_second": 1.535, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.810800459594025e-06, |
|
"loss": 0.6947, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 3.3320566832631176e-06, |
|
"loss": 0.6967, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 2.85331290693221e-06, |
|
"loss": 0.6904, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 2.3745691306013025e-06, |
|
"loss": 0.6949, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 1.8958253542703947e-06, |
|
"loss": 0.6871, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 0.6687204837799072, |
|
"eval_runtime": 3111.3683, |
|
"eval_samples_per_second": 24.779, |
|
"eval_steps_per_second": 1.549, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 3.6311394284300384e-05, |
|
"loss": 0.7034, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.600210317951256e-05, |
|
"loss": 0.7149, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 3.5692812074724736e-05, |
|
"loss": 0.716, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.5383520969936905e-05, |
|
"loss": 0.7236, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 3.5074229865149074e-05, |
|
"loss": 0.7161, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_loss": 0.6960982084274292, |
|
"eval_runtime": 3127.0335, |
|
"eval_samples_per_second": 24.655, |
|
"eval_steps_per_second": 1.541, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 3.476493876036126e-05, |
|
"loss": 0.7149, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 3.4455647655573426e-05, |
|
"loss": 0.7203, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 3.41463565507856e-05, |
|
"loss": 0.7259, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 3.383706544599777e-05, |
|
"loss": 0.7213, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 3.35290115056291e-05, |
|
"loss": 0.7265, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_loss": 0.7006255984306335, |
|
"eval_runtime": 3139.1903, |
|
"eval_samples_per_second": 24.559, |
|
"eval_steps_per_second": 1.535, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 3.321972040084128e-05, |
|
"loss": 0.7228, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 3.2910429296053447e-05, |
|
"loss": 0.7214, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 3.260113819126562e-05, |
|
"loss": 0.7309, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 3.229184708647779e-05, |
|
"loss": 0.7208, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 3.198255598168997e-05, |
|
"loss": 0.7284, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_loss": 0.6941153407096863, |
|
"eval_runtime": 3132.0931, |
|
"eval_samples_per_second": 24.615, |
|
"eval_steps_per_second": 1.539, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 3.167326487690214e-05, |
|
"loss": 0.7233, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 3.136397377211431e-05, |
|
"loss": 0.7219, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 3.105468266732649e-05, |
|
"loss": 0.7163, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 3.0745391562538664e-05, |
|
"loss": 0.7223, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 3.0436100457750836e-05, |
|
"loss": 0.724, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_loss": 0.6886956095695496, |
|
"eval_runtime": 3117.6812, |
|
"eval_samples_per_second": 24.729, |
|
"eval_steps_per_second": 1.546, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3.012680935296301e-05, |
|
"loss": 0.7181, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 2.9817518248175185e-05, |
|
"loss": 0.7215, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 2.9508845725596935e-05, |
|
"loss": 0.726, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 2.9199554620809104e-05, |
|
"loss": 0.7205, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 2.8890882098230854e-05, |
|
"loss": 0.7266, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"eval_loss": 0.6931244134902954, |
|
"eval_runtime": 3180.6149, |
|
"eval_samples_per_second": 24.239, |
|
"eval_steps_per_second": 1.515, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 2.858159099344303e-05, |
|
"loss": 0.7214, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 2.8272299888655202e-05, |
|
"loss": 0.7165, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 2.7963627366076955e-05, |
|
"loss": 0.7044, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 2.7654954843498705e-05, |
|
"loss": 0.7071, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 2.7345663738710874e-05, |
|
"loss": 0.7051, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"eval_loss": 0.6846074461936951, |
|
"eval_runtime": 3127.4042, |
|
"eval_samples_per_second": 24.652, |
|
"eval_steps_per_second": 1.541, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 2.703637263392305e-05, |
|
"loss": 0.7139, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 2.6727081529135223e-05, |
|
"loss": 0.7057, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 2.64177904243474e-05, |
|
"loss": 0.7104, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 2.610849931955957e-05, |
|
"loss": 0.7036, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 2.5799208214771743e-05, |
|
"loss": 0.7106, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"eval_loss": 0.68162602186203, |
|
"eval_runtime": 3071.6142, |
|
"eval_samples_per_second": 25.1, |
|
"eval_steps_per_second": 1.569, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 2.548991710998392e-05, |
|
"loss": 0.7072, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 2.5180626005196088e-05, |
|
"loss": 0.7073, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 2.4871334900408268e-05, |
|
"loss": 0.7109, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 2.456204379562044e-05, |
|
"loss": 0.7041, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 2.4252752690832612e-05, |
|
"loss": 0.7011, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"eval_loss": 0.6830089688301086, |
|
"eval_runtime": 3073.7046, |
|
"eval_samples_per_second": 25.082, |
|
"eval_steps_per_second": 1.568, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 2.3943461586044785e-05, |
|
"loss": 0.7041, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 2.3634789063466535e-05, |
|
"loss": 0.7057, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 2.332549795867871e-05, |
|
"loss": 0.7068, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 2.3016825436100457e-05, |
|
"loss": 0.7073, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 2.2707534331312633e-05, |
|
"loss": 0.6997, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_loss": 0.6784160137176514, |
|
"eval_runtime": 3076.1607, |
|
"eval_samples_per_second": 25.062, |
|
"eval_steps_per_second": 1.567, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 2.2398243226524806e-05, |
|
"loss": 0.7062, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 2.208895212173698e-05, |
|
"loss": 0.6982, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 2.1779661016949154e-05, |
|
"loss": 0.7, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 2.147036991216133e-05, |
|
"loss": 0.6981, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 2.1161697389583076e-05, |
|
"loss": 0.6969, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_loss": 0.673393189907074, |
|
"eval_runtime": 3053.9536, |
|
"eval_samples_per_second": 25.245, |
|
"eval_steps_per_second": 1.578, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 2.0852406284795252e-05, |
|
"loss": 0.6961, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 2.0543115180007425e-05, |
|
"loss": 0.6941, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 2.0233824075219597e-05, |
|
"loss": 0.6938, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 1.9924532970431773e-05, |
|
"loss": 0.6968, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 1.9615241865643945e-05, |
|
"loss": 0.6968, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"eval_loss": 0.6708864569664001, |
|
"eval_runtime": 3020.4771, |
|
"eval_samples_per_second": 25.524, |
|
"eval_steps_per_second": 1.595, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 1.9305950760856118e-05, |
|
"loss": 0.6928, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 1.899665965606829e-05, |
|
"loss": 0.6975, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 1.8687368551280466e-05, |
|
"loss": 0.6912, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 1.837807744649264e-05, |
|
"loss": 0.6923, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.8068786341704814e-05, |
|
"loss": 0.6867, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"eval_loss": 0.6655944585800171, |
|
"eval_runtime": 3033.3682, |
|
"eval_samples_per_second": 25.416, |
|
"eval_steps_per_second": 1.589, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.7759495236916987e-05, |
|
"loss": 0.6894, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 1.745020413212916e-05, |
|
"loss": 0.6861, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 1.714153160955091e-05, |
|
"loss": 0.6917, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 1.6832240504763082e-05, |
|
"loss": 0.688, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 1.6522949399975258e-05, |
|
"loss": 0.6925, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"eval_loss": 0.6661481261253357, |
|
"eval_runtime": 3083.2268, |
|
"eval_samples_per_second": 25.005, |
|
"eval_steps_per_second": 1.563, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 1.621365829518743e-05, |
|
"loss": 0.6943, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 1.5904367190399606e-05, |
|
"loss": 0.6891, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.559507608561178e-05, |
|
"loss": 0.6848, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 1.5285784980823954e-05, |
|
"loss": 0.6816, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 1.49771124582457e-05, |
|
"loss": 0.6795, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"eval_loss": 0.6605859398841858, |
|
"eval_runtime": 3089.4045, |
|
"eval_samples_per_second": 24.955, |
|
"eval_steps_per_second": 1.56, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.4667821353457875e-05, |
|
"loss": 0.6779, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 1.4358530248670049e-05, |
|
"loss": 0.6846, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 1.4049239143882223e-05, |
|
"loss": 0.6739, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 1.3740566621303971e-05, |
|
"loss": 0.6772, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 1.3431275516516146e-05, |
|
"loss": 0.6774, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 0.6616868376731873, |
|
"eval_runtime": 3137.3755, |
|
"eval_samples_per_second": 24.573, |
|
"eval_steps_per_second": 1.536, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 1.3122602993937894e-05, |
|
"loss": 0.6768, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 1.2813311889150068e-05, |
|
"loss": 0.6794, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.2504020784362242e-05, |
|
"loss": 0.6816, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 1.2194729679574415e-05, |
|
"loss": 0.6748, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 1.1886057156996165e-05, |
|
"loss": 0.6756, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_loss": 0.6562890410423279, |
|
"eval_runtime": 3110.9217, |
|
"eval_samples_per_second": 24.782, |
|
"eval_steps_per_second": 1.549, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 1.1576766052208339e-05, |
|
"loss": 0.672, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 1.1267474947420513e-05, |
|
"loss": 0.6711, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 1.0958183842632687e-05, |
|
"loss": 0.6731, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 1.064889273784486e-05, |
|
"loss": 0.6707, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 1.0339601633057034e-05, |
|
"loss": 0.6728, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_loss": 0.6547101140022278, |
|
"eval_runtime": 3126.1621, |
|
"eval_samples_per_second": 24.662, |
|
"eval_steps_per_second": 1.542, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 1.0030929110478784e-05, |
|
"loss": 0.6727, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 9.721638005690958e-06, |
|
"loss": 0.6679, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 9.41234690090313e-06, |
|
"loss": 0.6626, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 9.103055796115304e-06, |
|
"loss": 0.6703, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 8.793764691327477e-06, |
|
"loss": 0.6732, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_loss": 0.6520426273345947, |
|
"eval_runtime": 3118.7072, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 1.545, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 8.484473586539651e-06, |
|
"loss": 0.6734, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 8.175182481751825e-06, |
|
"loss": 0.6707, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 7.865891376964e-06, |
|
"loss": 0.667, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 7.557218854385749e-06, |
|
"loss": 0.6667, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 7.2485463318074974e-06, |
|
"loss": 0.6704, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_loss": 0.6492029428482056, |
|
"eval_runtime": 3114.0251, |
|
"eval_samples_per_second": 24.758, |
|
"eval_steps_per_second": 1.548, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 6.939255227019672e-06, |
|
"loss": 0.6631, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 6.629964122231845e-06, |
|
"loss": 0.663, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 6.320673017444019e-06, |
|
"loss": 0.6661, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 6.011381912656192e-06, |
|
"loss": 0.6641, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 5.7020908078683665e-06, |
|
"loss": 0.6666, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"eval_loss": 0.6445870995521545, |
|
"eval_runtime": 3137.0802, |
|
"eval_samples_per_second": 24.576, |
|
"eval_steps_per_second": 1.536, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 5.392799703080539e-06, |
|
"loss": 0.6612, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 5.083508598292713e-06, |
|
"loss": 0.6645, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 4.774217493504887e-06, |
|
"loss": 0.6613, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 4.464926388717061e-06, |
|
"loss": 0.6608, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.155635283929235e-06, |
|
"loss": 0.6615, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 0.6487849950790405, |
|
"eval_runtime": 3095.1776, |
|
"eval_samples_per_second": 24.908, |
|
"eval_steps_per_second": 1.557, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 3.846344179141408e-06, |
|
"loss": 0.6598, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 3.537671656563157e-06, |
|
"loss": 0.6569, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 3.2289991339849062e-06, |
|
"loss": 0.6587, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 2.920326611406656e-06, |
|
"loss": 0.6575, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 2.61103550661883e-06, |
|
"loss": 0.6638, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"eval_loss": 0.6522655487060547, |
|
"eval_runtime": 3117.0073, |
|
"eval_samples_per_second": 24.734, |
|
"eval_steps_per_second": 1.546, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 2.3017444018310032e-06, |
|
"loss": 0.6553, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 1.9930718792527527e-06, |
|
"loss": 0.6617, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 1.6837807744649262e-06, |
|
"loss": 0.6583, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 1.3744896696771002e-06, |
|
"loss": 0.6579, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 1.0651985648892737e-06, |
|
"loss": 0.6588, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"eval_loss": 0.6414651274681091, |
|
"eval_runtime": 3135.6411, |
|
"eval_samples_per_second": 24.587, |
|
"eval_steps_per_second": 1.537, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 7.559074601014475e-07, |
|
"loss": 0.6589, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 4.4661635531362117e-07, |
|
"loss": 0.6582, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 1.3732525052579488e-07, |
|
"loss": 0.6569, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 171660, |
|
"total_flos": 2.5739338132512957e+18, |
|
"train_loss": 0.23807151880656507, |
|
"train_runtime": 725915.7505, |
|
"train_samples_per_second": 30.268, |
|
"train_steps_per_second": 0.236 |
|
} |
|
], |
|
"max_steps": 171660, |
|
"num_train_epochs": 15, |
|
"total_flos": 2.5739338132512957e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|