{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 1, "global_step": 70, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14285714285714285, "grad_norm": 1.1953125, "learning_rate": 2.5e-05, "loss": 0.6913, "step": 1 }, { "epoch": 0.14285714285714285, "eval_loss": 0.675564169883728, "eval_matthews_correlation": 0.039478056766886455, "eval_runtime": 1.9051, "eval_samples_per_second": 116.006, "eval_steps_per_second": 2.1, "step": 1 }, { "epoch": 0.2857142857142857, "grad_norm": 3.6875, "learning_rate": 5e-05, "loss": 0.7041, "step": 2 }, { "epoch": 0.2857142857142857, "eval_loss": 0.6751559972763062, "eval_matthews_correlation": 0.018836815775184944, "eval_runtime": 1.9021, "eval_samples_per_second": 116.189, "eval_steps_per_second": 2.103, "step": 2 }, { "epoch": 0.42857142857142855, "grad_norm": 3.25, "learning_rate": 4.9264705882352944e-05, "loss": 0.6404, "step": 3 }, { "epoch": 0.42857142857142855, "eval_loss": 0.672702431678772, "eval_matthews_correlation": 0.02597419770482644, "eval_runtime": 1.8999, "eval_samples_per_second": 116.32, "eval_steps_per_second": 2.105, "step": 3 }, { "epoch": 0.5714285714285714, "grad_norm": 2.84375, "learning_rate": 4.8529411764705885e-05, "loss": 0.7011, "step": 4 }, { "epoch": 0.5714285714285714, "eval_loss": 0.6732006669044495, "eval_matthews_correlation": 0.011067888344390708, "eval_runtime": 1.9011, "eval_samples_per_second": 116.248, "eval_steps_per_second": 2.104, "step": 4 }, { "epoch": 0.7142857142857143, "grad_norm": 2.8125, "learning_rate": 4.7794117647058826e-05, "loss": 0.7219, "step": 5 }, { "epoch": 0.7142857142857143, "eval_loss": 0.6730763912200928, "eval_matthews_correlation": -0.0029297843623512934, "eval_runtime": 1.901, "eval_samples_per_second": 116.252, "eval_steps_per_second": 2.104, "step": 5 }, { "epoch": 0.8571428571428571, "grad_norm": 6.1875, "learning_rate": 4.705882352941177e-05, "loss": 0.7232, "step": 6 }, { "epoch": 0.8571428571428571, "eval_loss": 0.673651397228241, "eval_matthews_correlation": 0.011067888344390708, "eval_runtime": 1.9003, "eval_samples_per_second": 116.297, "eval_steps_per_second": 2.105, "step": 6 }, { "epoch": 1.0, "grad_norm": 4.53125, "learning_rate": 4.632352941176471e-05, "loss": 0.7109, "step": 7 }, { "epoch": 1.0, "eval_loss": 0.6717435121536255, "eval_matthews_correlation": 0.011067888344390708, "eval_runtime": 1.9013, "eval_samples_per_second": 116.234, "eval_steps_per_second": 2.104, "step": 7 }, { "epoch": 1.1428571428571428, "grad_norm": 6.28125, "learning_rate": 4.558823529411765e-05, "loss": 0.6897, "step": 8 }, { "epoch": 1.1428571428571428, "eval_loss": 0.6718308329582214, "eval_matthews_correlation": 0.011067888344390708, "eval_runtime": 1.9012, "eval_samples_per_second": 116.241, "eval_steps_per_second": 2.104, "step": 8 }, { "epoch": 1.2857142857142856, "grad_norm": 3.09375, "learning_rate": 4.485294117647059e-05, "loss": 0.6994, "step": 9 }, { "epoch": 1.2857142857142856, "eval_loss": 0.672776460647583, "eval_matthews_correlation": 0.011067888344390708, "eval_runtime": 1.9005, "eval_samples_per_second": 116.288, "eval_steps_per_second": 2.105, "step": 9 }, { "epoch": 1.4285714285714286, "grad_norm": 8.5, "learning_rate": 4.411764705882353e-05, "loss": 0.6598, "step": 10 }, { "epoch": 1.4285714285714286, "eval_loss": 0.6726637482643127, "eval_matthews_correlation": -0.0029297843623512934, "eval_runtime": 1.9, "eval_samples_per_second": 116.317, "eval_steps_per_second": 2.105, "step": 10 }, { "epoch": 1.5714285714285714, "grad_norm": 1.203125, "learning_rate": 4.3382352941176474e-05, "loss": 0.6885, "step": 11 }, { "epoch": 1.5714285714285714, "eval_loss": 0.675143837928772, "eval_matthews_correlation": -0.028683275619690942, "eval_runtime": 1.8994, "eval_samples_per_second": 116.351, "eval_steps_per_second": 2.106, "step": 11 }, { "epoch": 1.7142857142857144, "grad_norm": 4.59375, "learning_rate": 4.2647058823529415e-05, "loss": 0.708, "step": 12 }, { "epoch": 1.7142857142857144, "eval_loss": 0.6781198978424072, "eval_matthews_correlation": -0.028683275619690942, "eval_runtime": 1.9007, "eval_samples_per_second": 116.275, "eval_steps_per_second": 2.105, "step": 12 }, { "epoch": 1.8571428571428572, "grad_norm": 2.640625, "learning_rate": 4.1911764705882356e-05, "loss": 0.6756, "step": 13 }, { "epoch": 1.8571428571428572, "eval_loss": 0.6830955147743225, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.9014, "eval_samples_per_second": 116.231, "eval_steps_per_second": 2.104, "step": 13 }, { "epoch": 2.0, "grad_norm": 1.1015625, "learning_rate": 4.11764705882353e-05, "loss": 0.6827, "step": 14 }, { "epoch": 2.0, "eval_loss": 0.6893890500068665, "eval_matthews_correlation": 0.02099447073916433, "eval_runtime": 1.901, "eval_samples_per_second": 116.255, "eval_steps_per_second": 2.104, "step": 14 }, { "epoch": 2.142857142857143, "grad_norm": 2.015625, "learning_rate": 4.044117647058824e-05, "loss": 0.7033, "step": 15 }, { "epoch": 2.142857142857143, "eval_loss": 0.6950197815895081, "eval_matthews_correlation": -0.000624626584919963, "eval_runtime": 1.8984, "eval_samples_per_second": 116.414, "eval_steps_per_second": 2.107, "step": 15 }, { "epoch": 2.2857142857142856, "grad_norm": 1.4921875, "learning_rate": 3.970588235294117e-05, "loss": 0.7001, "step": 16 }, { "epoch": 2.2857142857142856, "eval_loss": 0.6963896155357361, "eval_matthews_correlation": -0.000624626584919963, "eval_runtime": 1.8999, "eval_samples_per_second": 116.323, "eval_steps_per_second": 2.105, "step": 16 }, { "epoch": 2.4285714285714284, "grad_norm": 5.15625, "learning_rate": 3.897058823529412e-05, "loss": 0.6424, "step": 17 }, { "epoch": 2.4285714285714284, "eval_loss": 0.6982244849205017, "eval_matthews_correlation": -0.000624626584919963, "eval_runtime": 1.8994, "eval_samples_per_second": 116.352, "eval_steps_per_second": 2.106, "step": 17 }, { "epoch": 2.571428571428571, "grad_norm": 3.78125, "learning_rate": 3.8235294117647055e-05, "loss": 0.6598, "step": 18 }, { "epoch": 2.571428571428571, "eval_loss": 0.701633632183075, "eval_matthews_correlation": -0.010960055896189614, "eval_runtime": 1.8505, "eval_samples_per_second": 119.425, "eval_steps_per_second": 2.162, "step": 18 }, { "epoch": 2.7142857142857144, "grad_norm": 4.375, "learning_rate": 3.7500000000000003e-05, "loss": 0.7053, "step": 19 }, { "epoch": 2.7142857142857144, "eval_loss": 0.7011630535125732, "eval_matthews_correlation": -0.010960055896189614, "eval_runtime": 1.8994, "eval_samples_per_second": 116.35, "eval_steps_per_second": 2.106, "step": 19 }, { "epoch": 2.857142857142857, "grad_norm": 1.296875, "learning_rate": 3.6764705882352945e-05, "loss": 0.6959, "step": 20 }, { "epoch": 2.857142857142857, "eval_loss": 0.695502519607544, "eval_matthews_correlation": -0.000624626584919963, "eval_runtime": 1.8499, "eval_samples_per_second": 119.465, "eval_steps_per_second": 2.162, "step": 20 }, { "epoch": 3.0, "grad_norm": 3.515625, "learning_rate": 3.6029411764705886e-05, "loss": 0.7014, "step": 21 }, { "epoch": 3.0, "eval_loss": 0.6924899816513062, "eval_matthews_correlation": -0.000624626584919963, "eval_runtime": 1.9, "eval_samples_per_second": 116.318, "eval_steps_per_second": 2.105, "step": 21 }, { "epoch": 3.142857142857143, "grad_norm": 6.34375, "learning_rate": 3.529411764705883e-05, "loss": 0.7011, "step": 22 }, { "epoch": 3.142857142857143, "eval_loss": 0.6877518892288208, "eval_matthews_correlation": 0.01001773397191543, "eval_runtime": 1.8999, "eval_samples_per_second": 116.325, "eval_steps_per_second": 2.105, "step": 22 }, { "epoch": 3.2857142857142856, "grad_norm": 2.984375, "learning_rate": 3.455882352941177e-05, "loss": 0.6765, "step": 23 }, { "epoch": 3.2857142857142856, "eval_loss": 0.682935357093811, "eval_matthews_correlation": 0.007873691885759546, "eval_runtime": 1.9014, "eval_samples_per_second": 116.233, "eval_steps_per_second": 2.104, "step": 23 }, { "epoch": 3.4285714285714284, "grad_norm": 3.171875, "learning_rate": 3.382352941176471e-05, "loss": 0.7059, "step": 24 }, { "epoch": 3.4285714285714284, "eval_loss": 0.6817290186882019, "eval_matthews_correlation": 0.0005753543746001685, "eval_runtime": 1.9018, "eval_samples_per_second": 116.208, "eval_steps_per_second": 2.103, "step": 24 }, { "epoch": 3.571428571428571, "grad_norm": 1.1953125, "learning_rate": 3.308823529411765e-05, "loss": 0.6734, "step": 25 }, { "epoch": 3.571428571428571, "eval_loss": 0.6798145174980164, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9008, "eval_samples_per_second": 116.265, "eval_steps_per_second": 2.104, "step": 25 }, { "epoch": 3.7142857142857144, "grad_norm": 1.3203125, "learning_rate": 3.235294117647059e-05, "loss": 0.6776, "step": 26 }, { "epoch": 3.7142857142857144, "eval_loss": 0.6806607246398926, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9014, "eval_samples_per_second": 116.231, "eval_steps_per_second": 2.104, "step": 26 }, { "epoch": 3.857142857142857, "grad_norm": 7.5, "learning_rate": 3.161764705882353e-05, "loss": 0.6379, "step": 27 }, { "epoch": 3.857142857142857, "eval_loss": 0.6790522933006287, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9049, "eval_samples_per_second": 116.015, "eval_steps_per_second": 2.1, "step": 27 }, { "epoch": 4.0, "grad_norm": 1.1484375, "learning_rate": 3.0882352941176475e-05, "loss": 0.6882, "step": 28 }, { "epoch": 4.0, "eval_loss": 0.6815522313117981, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.8987, "eval_samples_per_second": 116.395, "eval_steps_per_second": 2.107, "step": 28 }, { "epoch": 4.142857142857143, "grad_norm": 7.75, "learning_rate": 3.0147058823529413e-05, "loss": 0.6601, "step": 29 }, { "epoch": 4.142857142857143, "eval_loss": 0.6804011464118958, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9011, "eval_samples_per_second": 116.251, "eval_steps_per_second": 2.104, "step": 29 }, { "epoch": 4.285714285714286, "grad_norm": 4.0, "learning_rate": 2.9411764705882354e-05, "loss": 0.6992, "step": 30 }, { "epoch": 4.285714285714286, "eval_loss": 0.6818273067474365, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.8995, "eval_samples_per_second": 116.345, "eval_steps_per_second": 2.106, "step": 30 }, { "epoch": 4.428571428571429, "grad_norm": 4.125, "learning_rate": 2.8676470588235295e-05, "loss": 0.6962, "step": 31 }, { "epoch": 4.428571428571429, "eval_loss": 0.6815787553787231, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.8495, "eval_samples_per_second": 119.492, "eval_steps_per_second": 2.163, "step": 31 }, { "epoch": 4.571428571428571, "grad_norm": 1.0234375, "learning_rate": 2.7941176470588236e-05, "loss": 0.7036, "step": 32 }, { "epoch": 4.571428571428571, "eval_loss": 0.6793671250343323, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.8986, "eval_samples_per_second": 116.404, "eval_steps_per_second": 2.107, "step": 32 }, { "epoch": 4.714285714285714, "grad_norm": 1.6484375, "learning_rate": 2.7205882352941174e-05, "loss": 0.6784, "step": 33 }, { "epoch": 4.714285714285714, "eval_loss": 0.6809413433074951, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9012, "eval_samples_per_second": 116.245, "eval_steps_per_second": 2.104, "step": 33 }, { "epoch": 4.857142857142857, "grad_norm": 2.515625, "learning_rate": 2.647058823529412e-05, "loss": 0.67, "step": 34 }, { "epoch": 4.857142857142857, "eval_loss": 0.6806762218475342, "eval_matthews_correlation": 0.012938076628071615, "eval_runtime": 1.9026, "eval_samples_per_second": 116.155, "eval_steps_per_second": 2.102, "step": 34 }, { "epoch": 5.0, "grad_norm": 5.25, "learning_rate": 2.5735294117647057e-05, "loss": 0.6671, "step": 35 }, { "epoch": 5.0, "eval_loss": 0.6812605857849121, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.9017, "eval_samples_per_second": 116.211, "eval_steps_per_second": 2.103, "step": 35 }, { "epoch": 5.142857142857143, "grad_norm": 1.6796875, "learning_rate": 2.5e-05, "loss": 0.6736, "step": 36 }, { "epoch": 5.142857142857143, "eval_loss": 0.6816417574882507, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8517, "eval_samples_per_second": 119.347, "eval_steps_per_second": 2.16, "step": 36 }, { "epoch": 5.285714285714286, "grad_norm": 2.421875, "learning_rate": 2.4264705882352942e-05, "loss": 0.6884, "step": 37 }, { "epoch": 5.285714285714286, "eval_loss": 0.6835285425186157, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.9003, "eval_samples_per_second": 116.298, "eval_steps_per_second": 2.105, "step": 37 }, { "epoch": 5.428571428571429, "grad_norm": 3.6875, "learning_rate": 2.3529411764705884e-05, "loss": 0.7176, "step": 38 }, { "epoch": 5.428571428571429, "eval_loss": 0.6828601956367493, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.8971, "eval_samples_per_second": 116.495, "eval_steps_per_second": 2.109, "step": 38 }, { "epoch": 5.571428571428571, "grad_norm": 1.4375, "learning_rate": 2.2794117647058825e-05, "loss": 0.6654, "step": 39 }, { "epoch": 5.571428571428571, "eval_loss": 0.6825487017631531, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.8513, "eval_samples_per_second": 119.373, "eval_steps_per_second": 2.161, "step": 39 }, { "epoch": 5.714285714285714, "grad_norm": 6.125, "learning_rate": 2.2058823529411766e-05, "loss": 0.6557, "step": 40 }, { "epoch": 5.714285714285714, "eval_loss": 0.6816793084144592, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8502, "eval_samples_per_second": 119.448, "eval_steps_per_second": 2.162, "step": 40 }, { "epoch": 5.857142857142857, "grad_norm": 1.5390625, "learning_rate": 2.1323529411764707e-05, "loss": 0.6842, "step": 41 }, { "epoch": 5.857142857142857, "eval_loss": 0.6825309991836548, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.8521, "eval_samples_per_second": 119.327, "eval_steps_per_second": 2.16, "step": 41 }, { "epoch": 6.0, "grad_norm": 5.3125, "learning_rate": 2.058823529411765e-05, "loss": 0.6744, "step": 42 }, { "epoch": 6.0, "eval_loss": 0.683228075504303, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.8994, "eval_samples_per_second": 116.35, "eval_steps_per_second": 2.106, "step": 42 }, { "epoch": 6.142857142857143, "grad_norm": 1.90625, "learning_rate": 1.9852941176470586e-05, "loss": 0.7252, "step": 43 }, { "epoch": 6.142857142857143, "eval_loss": 0.6838776469230652, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.8046, "eval_samples_per_second": 122.467, "eval_steps_per_second": 2.217, "step": 43 }, { "epoch": 6.285714285714286, "grad_norm": 1.21875, "learning_rate": 1.9117647058823528e-05, "loss": 0.6785, "step": 44 }, { "epoch": 6.285714285714286, "eval_loss": 0.6839317679405212, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.901, "eval_samples_per_second": 116.253, "eval_steps_per_second": 2.104, "step": 44 }, { "epoch": 6.428571428571429, "grad_norm": 1.3515625, "learning_rate": 1.8382352941176472e-05, "loss": 0.6793, "step": 45 }, { "epoch": 6.428571428571429, "eval_loss": 0.6828281879425049, "eval_matthews_correlation": 0.014648552723664804, "eval_runtime": 1.9004, "eval_samples_per_second": 116.294, "eval_steps_per_second": 2.105, "step": 45 }, { "epoch": 6.571428571428571, "grad_norm": 6.5625, "learning_rate": 1.7647058823529414e-05, "loss": 0.6521, "step": 46 }, { "epoch": 6.571428571428571, "eval_loss": 0.6809899806976318, "eval_matthews_correlation": 0.026300443342391174, "eval_runtime": 1.901, "eval_samples_per_second": 116.257, "eval_steps_per_second": 2.104, "step": 46 }, { "epoch": 6.714285714285714, "grad_norm": 2.0625, "learning_rate": 1.6911764705882355e-05, "loss": 0.6568, "step": 47 }, { "epoch": 6.714285714285714, "eval_loss": 0.6805745959281921, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8994, "eval_samples_per_second": 116.353, "eval_steps_per_second": 2.106, "step": 47 }, { "epoch": 6.857142857142857, "grad_norm": 1.1875, "learning_rate": 1.6176470588235296e-05, "loss": 0.6669, "step": 48 }, { "epoch": 6.857142857142857, "eval_loss": 0.6794301271438599, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8992, "eval_samples_per_second": 116.364, "eval_steps_per_second": 2.106, "step": 48 }, { "epoch": 7.0, "grad_norm": 3.0625, "learning_rate": 1.5441176470588237e-05, "loss": 0.7096, "step": 49 }, { "epoch": 7.0, "eval_loss": 0.6792279481887817, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8999, "eval_samples_per_second": 116.322, "eval_steps_per_second": 2.105, "step": 49 }, { "epoch": 7.142857142857143, "grad_norm": 3.5, "learning_rate": 1.4705882352941177e-05, "loss": 0.698, "step": 50 }, { "epoch": 7.142857142857143, "eval_loss": 0.6769732236862183, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.8984, "eval_samples_per_second": 116.413, "eval_steps_per_second": 2.107, "step": 50 }, { "epoch": 7.285714285714286, "grad_norm": 2.140625, "learning_rate": 1.3970588235294118e-05, "loss": 0.6773, "step": 51 }, { "epoch": 7.285714285714286, "eval_loss": 0.6770384311676025, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.9001, "eval_samples_per_second": 116.311, "eval_steps_per_second": 2.105, "step": 51 }, { "epoch": 7.428571428571429, "grad_norm": 1.9453125, "learning_rate": 1.323529411764706e-05, "loss": 0.6603, "step": 52 }, { "epoch": 7.428571428571429, "eval_loss": 0.6764286160469055, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.9031, "eval_samples_per_second": 116.127, "eval_steps_per_second": 2.102, "step": 52 }, { "epoch": 7.571428571428571, "grad_norm": 1.203125, "learning_rate": 1.25e-05, "loss": 0.6804, "step": 53 }, { "epoch": 7.571428571428571, "eval_loss": 0.6764540076255798, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.9008, "eval_samples_per_second": 116.264, "eval_steps_per_second": 2.104, "step": 53 }, { "epoch": 7.714285714285714, "grad_norm": 2.171875, "learning_rate": 1.1764705882352942e-05, "loss": 0.681, "step": 54 }, { "epoch": 7.714285714285714, "eval_loss": 0.6770803928375244, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.8515, "eval_samples_per_second": 119.364, "eval_steps_per_second": 2.16, "step": 54 }, { "epoch": 7.857142857142857, "grad_norm": 4.34375, "learning_rate": 1.1029411764705883e-05, "loss": 0.6653, "step": 55 }, { "epoch": 7.857142857142857, "eval_loss": 0.6761192679405212, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.8528, "eval_samples_per_second": 119.281, "eval_steps_per_second": 2.159, "step": 55 }, { "epoch": 8.0, "grad_norm": 5.4375, "learning_rate": 1.0294117647058824e-05, "loss": 0.6527, "step": 56 }, { "epoch": 8.0, "eval_loss": 0.6764020919799805, "eval_matthews_correlation": 0.04543029179629692, "eval_runtime": 1.9014, "eval_samples_per_second": 116.233, "eval_steps_per_second": 2.104, "step": 56 }, { "epoch": 8.142857142857142, "grad_norm": 5.96875, "learning_rate": 9.558823529411764e-06, "loss": 0.661, "step": 57 }, { "epoch": 8.142857142857142, "eval_loss": 0.6761281490325928, "eval_matthews_correlation": 0.04543029179629692, "eval_runtime": 1.8981, "eval_samples_per_second": 116.432, "eval_steps_per_second": 2.107, "step": 57 }, { "epoch": 8.285714285714286, "grad_norm": 3.625, "learning_rate": 8.823529411764707e-06, "loss": 0.6332, "step": 58 }, { "epoch": 8.285714285714286, "eval_loss": 0.6764816641807556, "eval_matthews_correlation": 0.04543029179629692, "eval_runtime": 1.9012, "eval_samples_per_second": 116.24, "eval_steps_per_second": 2.104, "step": 58 }, { "epoch": 8.428571428571429, "grad_norm": 3.328125, "learning_rate": 8.088235294117648e-06, "loss": 0.6971, "step": 59 }, { "epoch": 8.428571428571429, "eval_loss": 0.6764915585517883, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.9012, "eval_samples_per_second": 116.243, "eval_steps_per_second": 2.104, "step": 59 }, { "epoch": 8.571428571428571, "grad_norm": 1.7890625, "learning_rate": 7.3529411764705884e-06, "loss": 0.6819, "step": 60 }, { "epoch": 8.571428571428571, "eval_loss": 0.6767964959144592, "eval_matthews_correlation": 0.04543029179629692, "eval_runtime": 1.9009, "eval_samples_per_second": 116.261, "eval_steps_per_second": 2.104, "step": 60 }, { "epoch": 8.714285714285714, "grad_norm": 4.0625, "learning_rate": 6.61764705882353e-06, "loss": 0.6832, "step": 61 }, { "epoch": 8.714285714285714, "eval_loss": 0.6778514981269836, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.8991, "eval_samples_per_second": 116.37, "eval_steps_per_second": 2.106, "step": 61 }, { "epoch": 8.857142857142858, "grad_norm": 1.203125, "learning_rate": 5.882352941176471e-06, "loss": 0.6673, "step": 62 }, { "epoch": 8.857142857142858, "eval_loss": 0.6788612008094788, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.9007, "eval_samples_per_second": 116.27, "eval_steps_per_second": 2.104, "step": 62 }, { "epoch": 9.0, "grad_norm": 6.65625, "learning_rate": 5.147058823529412e-06, "loss": 0.6472, "step": 63 }, { "epoch": 9.0, "eval_loss": 0.6786766648292542, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.8996, "eval_samples_per_second": 116.34, "eval_steps_per_second": 2.106, "step": 63 }, { "epoch": 9.142857142857142, "grad_norm": 2.78125, "learning_rate": 4.411764705882353e-06, "loss": 0.6997, "step": 64 }, { "epoch": 9.142857142857142, "eval_loss": 0.6784170866012573, "eval_matthews_correlation": 0.03236368357125948, "eval_runtime": 1.8989, "eval_samples_per_second": 116.385, "eval_steps_per_second": 2.107, "step": 64 }, { "epoch": 9.285714285714286, "grad_norm": 7.28125, "learning_rate": 3.6764705882352942e-06, "loss": 0.6486, "step": 65 }, { "epoch": 9.285714285714286, "eval_loss": 0.6794090867042542, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.9, "eval_samples_per_second": 116.314, "eval_steps_per_second": 2.105, "step": 65 }, { "epoch": 9.428571428571429, "grad_norm": 0.83984375, "learning_rate": 2.9411764705882355e-06, "loss": 0.6702, "step": 66 }, { "epoch": 9.428571428571429, "eval_loss": 0.6790147423744202, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.8501, "eval_samples_per_second": 119.451, "eval_steps_per_second": 2.162, "step": 66 }, { "epoch": 9.571428571428571, "grad_norm": 8.625, "learning_rate": 2.2058823529411767e-06, "loss": 0.7147, "step": 67 }, { "epoch": 9.571428571428571, "eval_loss": 0.6785043478012085, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.8499, "eval_samples_per_second": 119.469, "eval_steps_per_second": 2.162, "step": 67 }, { "epoch": 9.714285714285714, "grad_norm": 6.0, "learning_rate": 1.4705882352941177e-06, "loss": 0.6492, "step": 68 }, { "epoch": 9.714285714285714, "eval_loss": 0.6781408786773682, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.9005, "eval_samples_per_second": 116.283, "eval_steps_per_second": 2.105, "step": 68 }, { "epoch": 9.857142857142858, "grad_norm": 1.9140625, "learning_rate": 7.352941176470589e-07, "loss": 0.6741, "step": 69 }, { "epoch": 9.857142857142858, "eval_loss": 0.6785739660263062, "eval_matthews_correlation": 0.03840151481124831, "eval_runtime": 1.9018, "eval_samples_per_second": 116.205, "eval_steps_per_second": 2.103, "step": 69 }, { "epoch": 10.0, "grad_norm": 1.4921875, "learning_rate": 0.0, "loss": 0.6885, "step": 70 }, { "epoch": 10.0, "eval_loss": 0.6790357232093811, "eval_matthews_correlation": 0.019864667834482774, "eval_runtime": 1.9009, "eval_samples_per_second": 116.259, "eval_steps_per_second": 2.104, "step": 70 }, { "epoch": 10.0, "step": 70, "total_flos": 3.203834021989581e+16, "train_loss": 0.6814145530973162, "train_runtime": 351.2516, "train_samples_per_second": 25.139, "train_steps_per_second": 0.199 } ], "logging_steps": 1, "max_steps": 70, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.203834021989581e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }